def website_from_profile(profile, cluster): website = Website(account_key=profile.account_key, return_url=profile.return_url, website_name=profile.website_name, website_url=profile.website, cluster=cluster) website.save() return website
def delete(self): try: Website.get(url=self.url) if Website.delete().where((Website.customer == self.customer) and (Website.url == self.url)).execute(): return {"message": "Website deleted successfully"} except DoesNotExist: if DoesNotExist: return {"message": "Website does not exist"}
async def test_site(website: models.Website) -> str: site_status : str = '' # headers for chrome browser to prevent a bot block making us report false errors user_agent_list = [ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:77.0) Gecko/20100101 Firefox/77.0', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36', ] #Pick a random user agent user_agent = random.choice(user_agent_list) #Set the headers headers = {'User-Agent': user_agent} try: try: # --- send a request to the site and log the response response = requests.get(website.get_url(), headers=headers) if response.status_code == 200: msg = f"site {website.name} ({website.get_url()}) online with 200 response" log.info(msg=msg) else: msg = f"Warnign: site {website.name} ({website.get_url()}) online but has a {response.status_code} response" log.warning(msg) site_status = str(response.status_code) except requests.exceptions.SSLError as e: # log security error msg: str = f"{site_status} on site '{website.name}' ({website.get_url()}):\n\t{e}\n" log.error(msg,exc_info=False) # --- Retry the connection without verifying the SSL certificate # --- First we ignore SSL errors # Get the requests session session: requests.Session = requests.Session() # set the session to not verify SSL certificates session.verify = False # send a request to the site and log the response response = session.get(website.get_url(), headers=headers) if response.status_code == 200: msg = f"Warning: site {website.name} ({website.get_url()}) online with 200 response and invalid certificate" log.warn(msg=msg) else: msg = f"Warnign: site {website.name} ({website.get_url()}) online but has a {response.status_code} response invalid certificate" log.warning(msg) site_status = f"Invalid Certificate ({response.status_code})" except requests.exceptions.ConnectionError as e: site_status = 'Unable to connect' # log connection error msg: str = f'{site_status} site {website.name} ({website.get_url()}):\n\t{e}\n' log.error(msg,exc_info=True) except Exception as e: site_status = 'Unknown Error checking page' # log the exception msg: str = f'Unknown exception checking site {website.name} ({website.get_url()}):\n\t{e}\n' log.error(msg,exc_info=True) finally: return site_status
def update(self): try: Website.get(url=self.url) except DoesNotExist: if DoesNotExist: return {"message": "Website does not exist"} else: Website.update(url=self.url).where( Website.customer == self.customer).returning(Website) return {"message": "Wesite updated successfully"}
def create(self): """ check customers plan and limit them to the number of website that's accordigng to their plan""" "check customers plan, check customers website count" if re.match(self.pattern, self.url, flags=0): try: customer = Customer.get(id=self.customer) except DoesNotExist: return {"message": "Customer does not exist"} website_count = Website.select().where( Website.customer == customer.id).count() if customer.plan.quantity != 0 and customer.plan.quantity > website_count and customer.renewal_date != None: website = Website(url=self.url, customer=self.customer) website.save() message = "Website created successfully" return {"message": message} if customer.plan.quantity == 0 and customer.renewal_date != None: website = Website(url=self.url, customer=self.customer) website.save() message = "Website created successfully" return {"message": message} if customer.renewal_date == None: return {"message": "Sorry, your plan has expired"} else: return { "message": "Sorry, you can't add more websites, your have exceeded your subscription limit" } else: return {"message": "Invalid website"}
def add_websites(db): website_1 = Website.create_and_add(id=1, url="www.google.com") website_2 = Website.create_and_add(id=2, url="www.ebay.com") website_3 = Website.create_and_add(id=3, url="www.amazon.com") website_4 = Website.create_and_add(id=4, url="www.github.com") website_5 = Website.create_and_add(id=5, url="www.reddit.com") website_6 = Website.create_and_add(id=6, url="www.facebook.com") website_7 = Website.create_and_add(id=7, url="www.twitter.com") website_8 = Website.create_and_add(id=8, url="www.bing.com") website_9 = Website.create_and_add(id=9, url="www.youtube.com") keys.add("website_1_id", website_1.id) keys.add("website_2_id", website_2.id) keys.add("website_3_id", website_3.id) keys.add("website_4_id", website_4.id) keys.add("website_5_id", website_5.id) keys.add("website_6_id", website_6.id) keys.add("website_7_id", website_7.id) keys.add("website_8_id", website_8.id) keys.add("website_9_id", website_9.id)
def post(self): _user = g.user if not _user.is_root: return {'status': 400, 'message': '需要系统管理员权限'}, 400 _name = request.json.get('name', None) if not _name or len(_name) > 40: return {'status': 400, 'message': '请提供正确的网站名称'}, 400 _website = Website(name=_name) db.session.add(_website) db.session.commit() return _website.to_dict(), 201
def populate_topsites(num=100): sites = alexa.topsites(num) for s in sites: q = db.session.query(Website).filter_by(url=s['url']) w = q.first() if not w: w = Website(url=s['url']) w.global_rank = s['global_rank'] w.reach_per_million = s['reach_per_million'] w.page_views_per_million = s['page_views_per_million'] w.page_views_per_user = s['page_views_per_user'] print "Adding %s" % w.url db.session.add(w) db.session.commit() print "Finished updating topsites ====================\n"
def main(): session = Session() start_url = "https://register.start.bg" queue = [start_url] visited = [start_url] while len(queue): current_url = queue.pop(0) re = requests.get(current_url, timeout=10) server = re.headers['Server'] website = Website(url=current_url, server=server) result = session.query(Website).filter( Website.url == current_url).first() if result is None: session.add(website) session.commit() try: doc_html = re.content.decode('utf-8') except UnicodeDecodeError: pass soup = BeautifulSoup(doc_html, 'html.parser') for link in soup.find_all('a'): site = str(link.get('href')) if site.startswith("http") or site.startswith("https"): if site is not None and not site.startswith('#'): if '.bg' in site and site not in visited: print(f' ') print(site) queue.append(site) visited.append(site)
def manage_websites(): form = WebsiteForm() websites = Website.query.filter_by() delete = request.args.get('delete', 'false') edit = request.args.get('edit', 'false') if 'email' not in session: return render_template('pages/placeholder.notsignin.html') else: if request.method == 'POST': if edit != 'false': website = Website.query.filter_by(domain_name=edit).first() website.cost = form.cost.data if form.cost.data else 0 db.session.commit() return redirect(url_for('manage_websites')) if not form.validate(): return render_template('pages/placeholder.websites.html', websites=websites, form=form) else: cost = form.cost.data if form.cost.data else 0 new_website = Website(form.domain_name.data, cost) db.session.add(new_website) db.session.commit() return redirect(url_for('manage_websites', edit=edit)) elif request.method == 'GET': if delete != 'false': Website.query.filter_by(domain_name=delete).delete() db.session.commit() return render_template('pages/placeholder.websites.html', websites=websites, form=form, edit=edit)
def survey_3(): g.user = current_user # get folders for which this user is a member # todo: and for which a survey has not been done this year sites = Website.query.filter_by(PI_username=current_user.uid_trim()).all() choices = [] for f in sites: found = 1 for s in f.surveys.all(): if s.year == datetime.datetime.utcnow().year: found = 0 #so dont include this one break if found: choices.append(f.site_name) choices.append('Other') if Survey1.has_been_done_by(current_user.uid_trim(),datetime.datetime.utcnow().year)[0] \ and Survey2.has_been_done_by(current_user.uid_trim(),datetime.datetime.utcnow().year)[0]: form = Survey3Form(request.form) if form.validate_on_submit(): survey = Survey3(current_user.uid_trim(), alt_email="not a real email") form.populate_obj(survey) survey.site = request.form.get('site_name') if Website.query.filter_by( site_name=request.form.get('site_name')).count() == 0: website = Website(current_user.uid_trim(), request.form.get('other_site'), request.form.get('url')) db.session.add(website) db.session.commit() else: website = Website.query.filter_by( site_name=request.form.get('site_name')).first() survey.website_id = website.id db.session.add(survey) db.session.commit() return redirect(url_for('index')) elif request.form.get('has_site') == 'N': survey = Survey3(current_user.uid_trim(), alt_email="none required") db.session.add(survey) db.session.commit() return redirect(url_for('index')) return render_template('survey/Survey3.html', title='Survey', form=form, sitefield=choices) else: return redirect(url_for('index'))
def add_website(website_name, website_type): if website_type == "M" or website_type == "E": try: Website(website_name=website_name, website_type=website_type).save() return json.dumps({"message": "AddingWebsiteSuccesful"}), 200 except: return json.dumps({"message": "AddingWebsiteFailed"}), 401 else: return json.dumps({"message": "AddingWebsiteFailed"}), 401
def add(request): name=request.POST['name'] url=request.POST['url'] number=request.POST['number'] keywords=request.POST['words'] """ If the website already exists, then we update it. Otherwise, we create a new one. """ w1=Website.objects.filter(url=url) """ At this point, the previous entry for this website is deleted, rather than updated. The idea is that it is faster to delete the old record, rather than searching through each of its fields to check which ones need to be updated and which do not. """ if w1!=[]: w1.name=name w1.number=number w1.delete() #Now we add the website to the database. w1=Website(name=name, url=url, number=number) w1.save() """Remove blank spaces from words and separate into separate entries in a list, rather than just a string. """ keywords=stl(keywords) """ Add the keywords that describe this Website. If the keyword already exists, then we find it and name it k1. Otherwise, we create a new Keyword. """ for keyword in keywords: try: k1=Keyword.objects.get(name=keyword) w1.words.add(k1) except Keyword.DoesNotExist: #Create the new Keyword and save it. k1=Keyword(name=keyword) k1.save() #add this word to the Website w1.words.add(k1)
def test_clean_website_url(self): user = User.objects.create_user(username="******", password='******') user.save() cluster = WebsiteCluster(creator=user) cluster.save() website = Website(website_url="http://website.com/", cluster=cluster) website.save() variants = [ "http://website.com", "https://website.com/", ] for variant in variants: clean = clean_website_url(variant) website = Website.objects.get(website_url__contains=clean) self.assertTrue(website is not None)
def success(request): website = None tmpname = request.POST['website_name'] tmpurl = request.POST['website_url'] try: website = Website.objects.get(url=tmpurl) except Website.DoesNotExist: website = Website(name=tmpname, url=tmpurl) website.save() data = json.loads(request.POST['ads']) for ad in data: a = Ad(name=ad['fields']['name'], age=ad['fields']['age'], ethnicity=ad['fields']['ethnicity'], phone_number=ad['fields']['phone_number'], location=ad['fields']['location'], ad=ad['fields']['ad'], date=ad['fields']['date'], website=website) a.save() return HttpResponse("Success! <a href=\"/upload/\">Add More</a> <a href=\"/\">Home</a>")
def website_upload(): postJson = json.loads(request.data) app.logger.debug(postJson) if not postJson.has_key('hostname'): return jsonify(status='missing hostname') technologies = [] for t in postJson['technologies']: if not t.has_key('title'): return jsonify(status='missing technology title') if not t.has_key('category'): return jsonify(status='missing technology category') if not t.has_key('url'): t['url'] = None if not t.has_key('detail'): t['detail'] = None # 完全一致的技术 tmpTech = Technology.query.filter_by(title=t['title']).filter_by( category=t['category']).filter_by(detail=t['detail']).first() if tmpTech is None: tmpTech = Technology(category=t['category'], title=t['title'], detail=t['detail'], url=t['url']) db.session.add(tmpTech) technologies.append(tmpTech) upload = Website.query.filter_by(hostname=postJson['hostname'], port=postJson['port']).first() if not upload: upload = Website(hostname=postJson['hostname'], port=postJson['port'], title=postJson['title'], ipaddress=postJson['ipaddress'], geo=postJson['geo'], technologies=technologies) else: upload.last_time = datetime.now() upload.title = postJson['title'] upload.technologies = technologies upload.ipaddress = postJson['ipaddress'] upload.geo = postJson['geo'] upload.frequency = upload.frequency + 1 db.session.add(upload) db.session.commit() return jsonify(status='ok')
def upload(request): if request.method == 'POST': form = UploadFileForm(request.POST, request.FILES) if form.is_valid(): f = request.FILES['file'] website = None tmpurl = request.POST['website'] tmpname = "" if tmpurl == "NEWSITE": tmpurl = request.POST['newSiteUrl'] tmpname = request.POST['newSiteName'] try: website = Website.objects.get(url=tmpurl) except Website.DoesNotExist: website = Website(name=tmpname, url=tmpurl) results = handle_uploaded_file(f, website) return render_to_response('confirm.html', {'ads': results, "website": website, "serialized_ads": serializers.serialize("json", results)}) #return render_to_response('confirm.html', {'ads': results, "website": website, "serialized_ads": json.dumps(results)}) else: form = UploadFileForm() websites = [Website(name="site1", url="site1.com"), Website(name="site2", url="site2.org"), Website(name="site3", url="site3.edu")] return render_to_response('upload.html', {'form': form, 'websites': websites})
def save_info_to_database(self, soup, url): if "DOCTYPE html" in soup: print("YES") try: description = soup.find(property="og:description")["content"] title = soup.title.string except Exception: print("Image link") return if "https://" in url: ssl = 1 else: ssl = 0 domain = self.get_page_main_url(url) current_time = datetime.now() hrefs = self.get_page_hrefs(soup) pages_count = len(hrefs) outgoing_hrefs = [ self.parse_url(url, href) for href in hrefs if domain not in self.parse_url(url, href) ] outgoing_hrefs = len(outgoing_hrefs) website = Website(title=title, domain=domain, pages_count=pages_count) self.session.add(website) self.session.commit() site = self.session.query(Website).filter( Website.domain == domain).first() site.pages = [ Pages(date=current_time, url=url, title=title, desc=description, ads=outgoing_hrefs, SSL=ssl) ] self.session.commit()
def website_upload(): postJson = json.loads(request.data) app.logger.debug(postJson) if not postJson.has_key('hostname'): return jsonify(status = 'missing hostname') technologies = [] for t in postJson['technologies']: if not t.has_key('title'): return jsonify(status = 'missing technology title') if not t.has_key('category'): return jsonify(status = 'missing technology category') if not t.has_key('url'): t['url'] = None if not t.has_key('detail'): t['detail'] = None # 完全一致的技术 tmpTech = Technology.query.filter_by(title = t['title']).filter_by(category = t['category']).filter_by(detail = t['detail']).first() if tmpTech is None: tmpTech = Technology(category = t['category'], title = t['title'], detail = t['detail'], url = t['url']) db.session.add(tmpTech) technologies.append(tmpTech) upload = Website.query.filter_by(hostname = postJson['hostname'], port = postJson['port']).first() if not upload: upload = Website(hostname = postJson['hostname'], port = postJson['port'], title = postJson['title'], ipaddress = postJson['ipaddress'], geo = postJson['geo'], technologies = technologies) else: upload.last_time = datetime.now() upload.title = postJson['title'] upload.technologies = technologies upload.ipaddress = postJson['ipaddress'] upload.geo = postJson['geo'] upload.frequency = upload.frequency + 1 db.session.add(upload) db.session.commit() return jsonify(status = 'ok')
def signup(request): if request.method == 'POST': form = SignupForm(request.POST) if form.is_valid(): email = form.cleaned_data['email'] fullname = form.cleaned_data['fullname'] password = form.cleaned_data['password'] url = str(form.cleaned_data['url']).lower() # creating the fresh user user = User(fullname=fullname, email=email, password=password, type=1) user.save() # creating activation record for the fresh user User_Activation(user=user).save() description ='' res = get_http_response(url) # saving website's description if res: description=get_site_description(res)[0:510] if len(description) >= 507: description=description[0:507]+'...' name=extract_website_name(remove_landing_url(url)) # insert the url to websites table in an unverified state # associated to the fresh user -- custom validator made sure that no # record with same url is verified yet website = Website(url=url, name=name, description=description, type=1) website.save() # saving website's icon favicon_url = get_site_favicon_url(remove_landing_url(url)) or '' if favicon_url: favicon_content = http_read(favicon_url) or '' if favicon_content: filename32 = make_random_string(32)+'.png' filename48 = make_random_string(32)+'.png' filename64 = make_random_string(32)+'.png' favicon32 = ContentFile(normalize_img((32,32),favicon_content)) favicon48 = ContentFile(normalize_img((48,48),favicon_content)) favicon64 = ContentFile(normalize_img((64,64),favicon_content)) website.favicon32.save(filename32, favicon32) website.favicon48.save(filename48, favicon48) website.favicon64.save(filename64, favicon64) # first site screenshot insertion screenshot_name = make_random_string(32)+'.png' screenshot_image_content = site_screenshot(url) if screenshot_image_content: p = ImageFile.Parser() p.feed(screenshot_image_content) im=p.close() screenshot_image_content = ContentFile(normalize_img((im.size[0]/2,im.size[1]/2),screenshot_image_content)) wi = Website_Image(name=name, website=website) wi.image.save(screenshot_name, screenshot_image_content) # the actual user-website association creation User_Website(user=user,website=website).save() response = HttpResponse('Signup successful, please activate via email') return response # request.method is GET else: form = SignupForm() # An unbound form return render_form('signup_form.html', form, '', request)
bs = self.getPage(url) else: bs = self.getPage(site.url + url) if bs is None: print("page or url wrong!") return title = self.safeGet(bs, site.titleTag) body = self.safeGet(bs, site.bodyTag) if title != "" and body != "": content = Content(topic, title, body, url) content.print() crawler = Crawler() siteData = [[ "Oreilly Media", 'http://oreilly.com', 'https://ssearch.oreilly.com/?q=', 'article.product-result', 'p.title a', True, 'h1', 'section#product-description' ]] sites = [] for row in siteData: sites.append( Website(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7])) topics = ['python', 'data science'] for topic in topics: print("Get info about: " + topic) for targetSite in sites: crawler.search(topic, targetSite)