def create(self): """ check customers plan and limit them to the number of website that's accordigng to their plan""" "check customers plan, check customers website count" if re.match(self.pattern, self.url, flags=0): try: customer = Customer.get(id=self.customer) except DoesNotExist: return {"message": "Customer does not exist"} website_count = Website.select().where( Website.customer == customer.id).count() if customer.plan.quantity != 0 and customer.plan.quantity > website_count and customer.renewal_date != None: website = Website(url=self.url, customer=self.customer) website.save() message = "Website created successfully" return {"message": message} if customer.plan.quantity == 0 and customer.renewal_date != None: website = Website(url=self.url, customer=self.customer) website.save() message = "Website created successfully" return {"message": message} if customer.renewal_date == None: return {"message": "Sorry, your plan has expired"} else: return { "message": "Sorry, you can't add more websites, your have exceeded your subscription limit" } else: return {"message": "Invalid website"}
def manage_websites(): form = WebsiteForm() websites = Website.query.filter_by() delete = request.args.get('delete', 'false') edit = request.args.get('edit', 'false') if 'email' not in session: return render_template('pages/placeholder.notsignin.html') else: if request.method == 'POST': if edit != 'false': website = Website.query.filter_by(domain_name=edit).first() website.cost = form.cost.data if form.cost.data else 0 db.session.commit() return redirect(url_for('manage_websites')) if not form.validate(): return render_template('pages/placeholder.websites.html', websites=websites, form=form) else: cost = form.cost.data if form.cost.data else 0 new_website = Website(form.domain_name.data, cost) db.session.add(new_website) db.session.commit() return redirect(url_for('manage_websites', edit=edit)) elif request.method == 'GET': if delete != 'false': Website.query.filter_by(domain_name=delete).delete() db.session.commit() return render_template('pages/placeholder.websites.html', websites=websites, form=form, edit=edit)
def main(): session = Session() start_url = "https://register.start.bg" queue = [start_url] visited = [start_url] while len(queue): current_url = queue.pop(0) re = requests.get(current_url, timeout=10) server = re.headers['Server'] website = Website(url=current_url, server=server) result = session.query(Website).filter( Website.url == current_url).first() if result is None: session.add(website) session.commit() try: doc_html = re.content.decode('utf-8') except UnicodeDecodeError: pass soup = BeautifulSoup(doc_html, 'html.parser') for link in soup.find_all('a'): site = str(link.get('href')) if site.startswith("http") or site.startswith("https"): if site is not None and not site.startswith('#'): if '.bg' in site and site not in visited: print(f' ') print(site) queue.append(site) visited.append(site)
def website_from_profile(profile, cluster): website = Website(account_key=profile.account_key, return_url=profile.return_url, website_name=profile.website_name, website_url=profile.website, cluster=cluster) website.save() return website
def survey_3(): g.user = current_user # get folders for which this user is a member # todo: and for which a survey has not been done this year sites = Website.query.filter_by(PI_username=current_user.uid_trim()).all() choices = [] for f in sites: found = 1 for s in f.surveys.all(): if s.year == datetime.datetime.utcnow().year: found = 0 #so dont include this one break if found: choices.append(f.site_name) choices.append('Other') if Survey1.has_been_done_by(current_user.uid_trim(),datetime.datetime.utcnow().year)[0] \ and Survey2.has_been_done_by(current_user.uid_trim(),datetime.datetime.utcnow().year)[0]: form = Survey3Form(request.form) if form.validate_on_submit(): survey = Survey3(current_user.uid_trim(), alt_email="not a real email") form.populate_obj(survey) survey.site = request.form.get('site_name') if Website.query.filter_by( site_name=request.form.get('site_name')).count() == 0: website = Website(current_user.uid_trim(), request.form.get('other_site'), request.form.get('url')) db.session.add(website) db.session.commit() else: website = Website.query.filter_by( site_name=request.form.get('site_name')).first() survey.website_id = website.id db.session.add(survey) db.session.commit() return redirect(url_for('index')) elif request.form.get('has_site') == 'N': survey = Survey3(current_user.uid_trim(), alt_email="none required") db.session.add(survey) db.session.commit() return redirect(url_for('index')) return render_template('survey/Survey3.html', title='Survey', form=form, sitefield=choices) else: return redirect(url_for('index'))
def add_website(website_name, website_type): if website_type == "M" or website_type == "E": try: Website(website_name=website_name, website_type=website_type).save() return json.dumps({"message": "AddingWebsiteSuccesful"}), 200 except: return json.dumps({"message": "AddingWebsiteFailed"}), 401 else: return json.dumps({"message": "AddingWebsiteFailed"}), 401
def upload(request): if request.method == 'POST': form = UploadFileForm(request.POST, request.FILES) if form.is_valid(): f = request.FILES['file'] website = None tmpurl = request.POST['website'] tmpname = "" if tmpurl == "NEWSITE": tmpurl = request.POST['newSiteUrl'] tmpname = request.POST['newSiteName'] try: website = Website.objects.get(url=tmpurl) except Website.DoesNotExist: website = Website(name=tmpname, url=tmpurl) results = handle_uploaded_file(f, website) return render_to_response('confirm.html', {'ads': results, "website": website, "serialized_ads": serializers.serialize("json", results)}) #return render_to_response('confirm.html', {'ads': results, "website": website, "serialized_ads": json.dumps(results)}) else: form = UploadFileForm() websites = [Website(name="site1", url="site1.com"), Website(name="site2", url="site2.org"), Website(name="site3", url="site3.edu")] return render_to_response('upload.html', {'form': form, 'websites': websites})
def website_upload(): postJson = json.loads(request.data) app.logger.debug(postJson) if not postJson.has_key('hostname'): return jsonify(status='missing hostname') technologies = [] for t in postJson['technologies']: if not t.has_key('title'): return jsonify(status='missing technology title') if not t.has_key('category'): return jsonify(status='missing technology category') if not t.has_key('url'): t['url'] = None if not t.has_key('detail'): t['detail'] = None # 完全一致的技术 tmpTech = Technology.query.filter_by(title=t['title']).filter_by( category=t['category']).filter_by(detail=t['detail']).first() if tmpTech is None: tmpTech = Technology(category=t['category'], title=t['title'], detail=t['detail'], url=t['url']) db.session.add(tmpTech) technologies.append(tmpTech) upload = Website.query.filter_by(hostname=postJson['hostname'], port=postJson['port']).first() if not upload: upload = Website(hostname=postJson['hostname'], port=postJson['port'], title=postJson['title'], ipaddress=postJson['ipaddress'], geo=postJson['geo'], technologies=technologies) else: upload.last_time = datetime.now() upload.title = postJson['title'] upload.technologies = technologies upload.ipaddress = postJson['ipaddress'] upload.geo = postJson['geo'] upload.frequency = upload.frequency + 1 db.session.add(upload) db.session.commit() return jsonify(status='ok')
def save_info_to_database(self, soup, url): if "DOCTYPE html" in soup: print("YES") try: description = soup.find(property="og:description")["content"] title = soup.title.string except Exception: print("Image link") return if "https://" in url: ssl = 1 else: ssl = 0 domain = self.get_page_main_url(url) current_time = datetime.now() hrefs = self.get_page_hrefs(soup) pages_count = len(hrefs) outgoing_hrefs = [ self.parse_url(url, href) for href in hrefs if domain not in self.parse_url(url, href) ] outgoing_hrefs = len(outgoing_hrefs) website = Website(title=title, domain=domain, pages_count=pages_count) self.session.add(website) self.session.commit() site = self.session.query(Website).filter( Website.domain == domain).first() site.pages = [ Pages(date=current_time, url=url, title=title, desc=description, ads=outgoing_hrefs, SSL=ssl) ] self.session.commit()
def test_clean_website_url(self): user = User.objects.create_user(username="******", password='******') user.save() cluster = WebsiteCluster(creator=user) cluster.save() website = Website(website_url="http://website.com/", cluster=cluster) website.save() variants = [ "http://website.com", "https://website.com/", ] for variant in variants: clean = clean_website_url(variant) website = Website.objects.get(website_url__contains=clean) self.assertTrue(website is not None)
def success(request): website = None tmpname = request.POST['website_name'] tmpurl = request.POST['website_url'] try: website = Website.objects.get(url=tmpurl) except Website.DoesNotExist: website = Website(name=tmpname, url=tmpurl) website.save() data = json.loads(request.POST['ads']) for ad in data: a = Ad(name=ad['fields']['name'], age=ad['fields']['age'], ethnicity=ad['fields']['ethnicity'], phone_number=ad['fields']['phone_number'], location=ad['fields']['location'], ad=ad['fields']['ad'], date=ad['fields']['date'], website=website) a.save() return HttpResponse("Success! <a href=\"/upload/\">Add More</a> <a href=\"/\">Home</a>")
bs = self.getPage(url) else: bs = self.getPage(site.url + url) if bs is None: print("page or url wrong!") return title = self.safeGet(bs, site.titleTag) body = self.safeGet(bs, site.bodyTag) if title != "" and body != "": content = Content(topic, title, body, url) content.print() crawler = Crawler() siteData = [[ "Oreilly Media", 'http://oreilly.com', 'https://ssearch.oreilly.com/?q=', 'article.product-result', 'p.title a', True, 'h1', 'section#product-description' ]] sites = [] for row in siteData: sites.append( Website(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7])) topics = ['python', 'data science'] for topic in topics: print("Get info about: " + topic) for targetSite in sites: crawler.search(topic, targetSite)