Beispiel #1
0
    def create(self):
        """ check customers plan and limit them to the number of website that's accordigng to their plan"""
        "check customers plan, check customers website count"

        if re.match(self.pattern, self.url, flags=0):
            try:
                customer = Customer.get(id=self.customer)
            except DoesNotExist:
                return {"message": "Customer does not exist"}

            website_count = Website.select().where(
                Website.customer == customer.id).count()
            if customer.plan.quantity != 0 and customer.plan.quantity > website_count and customer.renewal_date != None:
                website = Website(url=self.url, customer=self.customer)
                website.save()
                message = "Website created successfully"
                return {"message": message}

            if customer.plan.quantity == 0 and customer.renewal_date != None:
                website = Website(url=self.url, customer=self.customer)
                website.save()
                message = "Website created successfully"
                return {"message": message}

            if customer.renewal_date == None:
                return {"message": "Sorry, your plan has expired"}

            else:
                return {
                    "message":
                    "Sorry, you can't add more websites, your have exceeded your subscription limit"
                }
        else:
            return {"message": "Invalid website"}
Beispiel #2
0
def manage_websites():
    form = WebsiteForm()
    websites = Website.query.filter_by()
    delete = request.args.get('delete', 'false')
    edit = request.args.get('edit', 'false')

    if 'email' not in session:
        return render_template('pages/placeholder.notsignin.html')
    else:
        if request.method == 'POST':
            if edit != 'false':
                website = Website.query.filter_by(domain_name=edit).first()
                website.cost = form.cost.data if form.cost.data else 0
                db.session.commit()
                return redirect(url_for('manage_websites'))

            if not form.validate():
                return render_template('pages/placeholder.websites.html',
                                       websites=websites,
                                       form=form)
            else:
                cost = form.cost.data if form.cost.data else 0
                new_website = Website(form.domain_name.data, cost)
                db.session.add(new_website)
                db.session.commit()
            return redirect(url_for('manage_websites', edit=edit))

        elif request.method == 'GET':
            if delete != 'false':
                Website.query.filter_by(domain_name=delete).delete()
                db.session.commit()
            return render_template('pages/placeholder.websites.html',
                                   websites=websites,
                                   form=form,
                                   edit=edit)
Beispiel #3
0
def main():
    session = Session()
    start_url = "https://register.start.bg"
    queue = [start_url]
    visited = [start_url]

    while len(queue):
        current_url = queue.pop(0)
        re = requests.get(current_url, timeout=10)
        server = re.headers['Server']

        website = Website(url=current_url, server=server)
        result = session.query(Website).filter(
            Website.url == current_url).first()
        if result is None:
            session.add(website)
            session.commit()

        try:
            doc_html = re.content.decode('utf-8')
        except UnicodeDecodeError:
            pass

        soup = BeautifulSoup(doc_html, 'html.parser')

        for link in soup.find_all('a'):
            site = str(link.get('href'))
            if site.startswith("http") or site.startswith("https"):
                if site is not None and not site.startswith('#'):
                    if '.bg' in site and site not in visited:
                        print(f'                   ')
                        print(site)
                        queue.append(site)
                        visited.append(site)
Beispiel #4
0
def website_from_profile(profile, cluster):
    website = Website(account_key=profile.account_key,
                      return_url=profile.return_url,
                      website_name=profile.website_name,
                      website_url=profile.website,
                      cluster=cluster)
    website.save()
    return website
Beispiel #5
0
def survey_3():
    g.user = current_user

    # get folders for which this user is a member
    # todo: and for which a survey has not been done this year
    sites = Website.query.filter_by(PI_username=current_user.uid_trim()).all()
    choices = []
    for f in sites:
        found = 1
        for s in f.surveys.all():
            if s.year == datetime.datetime.utcnow().year:
                found = 0  #so dont include this one
                break
        if found:
            choices.append(f.site_name)
    choices.append('Other')

    if Survey1.has_been_done_by(current_user.uid_trim(),datetime.datetime.utcnow().year)[0] \
            and Survey2.has_been_done_by(current_user.uid_trim(),datetime.datetime.utcnow().year)[0]:

        form = Survey3Form(request.form)

        if form.validate_on_submit():
            survey = Survey3(current_user.uid_trim(),
                             alt_email="not a real email")
            form.populate_obj(survey)
            survey.site = request.form.get('site_name')
            if Website.query.filter_by(
                    site_name=request.form.get('site_name')).count() == 0:
                website = Website(current_user.uid_trim(),
                                  request.form.get('other_site'),
                                  request.form.get('url'))
                db.session.add(website)
                db.session.commit()
            else:
                website = Website.query.filter_by(
                    site_name=request.form.get('site_name')).first()
            survey.website_id = website.id
            db.session.add(survey)

            db.session.commit()
            return redirect(url_for('index'))
        elif request.form.get('has_site') == 'N':
            survey = Survey3(current_user.uid_trim(),
                             alt_email="none required")
            db.session.add(survey)

            db.session.commit()
            return redirect(url_for('index'))

        return render_template('survey/Survey3.html',
                               title='Survey',
                               form=form,
                               sitefield=choices)
    else:
        return redirect(url_for('index'))
Beispiel #6
0
def add_website(website_name, website_type):
    if website_type == "M" or website_type == "E":
        try:
            Website(website_name=website_name,
                    website_type=website_type).save()
            return json.dumps({"message": "AddingWebsiteSuccesful"}), 200
        except:
            return json.dumps({"message": "AddingWebsiteFailed"}), 401
    else:
        return json.dumps({"message": "AddingWebsiteFailed"}), 401
Beispiel #7
0
def upload(request):
  if request.method == 'POST':
    form = UploadFileForm(request.POST, request.FILES)
    if form.is_valid():
      f = request.FILES['file']
      website = None
      tmpurl = request.POST['website']
      tmpname = ""
      if tmpurl == "NEWSITE":
        tmpurl = request.POST['newSiteUrl']
        tmpname = request.POST['newSiteName']
      try:
        website = Website.objects.get(url=tmpurl)
      except Website.DoesNotExist:
        website = Website(name=tmpname, url=tmpurl)
      results = handle_uploaded_file(f, website)
      return render_to_response('confirm.html', {'ads': results, "website": website, "serialized_ads": serializers.serialize("json", results)})
      #return render_to_response('confirm.html', {'ads': results, "website": website, "serialized_ads": json.dumps(results)})
  else:
    form = UploadFileForm()
  websites = [Website(name="site1", url="site1.com"), Website(name="site2", url="site2.org"), Website(name="site3", url="site3.edu")]
  return render_to_response('upload.html', {'form': form, 'websites': websites})
Beispiel #8
0
def website_upload():
    postJson = json.loads(request.data)
    app.logger.debug(postJson)
    if not postJson.has_key('hostname'):
        return jsonify(status='missing hostname')

    technologies = []
    for t in postJson['technologies']:
        if not t.has_key('title'):
            return jsonify(status='missing technology title')
        if not t.has_key('category'):
            return jsonify(status='missing technology category')
        if not t.has_key('url'):
            t['url'] = None
        if not t.has_key('detail'):
            t['detail'] = None

        # 完全一致的技术
        tmpTech = Technology.query.filter_by(title=t['title']).filter_by(
            category=t['category']).filter_by(detail=t['detail']).first()
        if tmpTech is None:
            tmpTech = Technology(category=t['category'],
                                 title=t['title'],
                                 detail=t['detail'],
                                 url=t['url'])
            db.session.add(tmpTech)

        technologies.append(tmpTech)

    upload = Website.query.filter_by(hostname=postJson['hostname'],
                                     port=postJson['port']).first()
    if not upload:
        upload = Website(hostname=postJson['hostname'],
                         port=postJson['port'],
                         title=postJson['title'],
                         ipaddress=postJson['ipaddress'],
                         geo=postJson['geo'],
                         technologies=technologies)
    else:
        upload.last_time = datetime.now()
        upload.title = postJson['title']
        upload.technologies = technologies
        upload.ipaddress = postJson['ipaddress']
        upload.geo = postJson['geo']
        upload.frequency = upload.frequency + 1

    db.session.add(upload)
    db.session.commit()

    return jsonify(status='ok')
Beispiel #9
0
    def save_info_to_database(self, soup, url):
        if "DOCTYPE html" in soup:
            print("YES")

        try:
            description = soup.find(property="og:description")["content"]
            title = soup.title.string
        except Exception:
            print("Image link")
            return

        if "https://" in url:
            ssl = 1
        else:
            ssl = 0

        domain = self.get_page_main_url(url)

        current_time = datetime.now()

        hrefs = self.get_page_hrefs(soup)
        pages_count = len(hrefs)
        outgoing_hrefs = [
            self.parse_url(url, href) for href in hrefs
            if domain not in self.parse_url(url, href)
        ]
        outgoing_hrefs = len(outgoing_hrefs)

        website = Website(title=title, domain=domain, pages_count=pages_count)
        self.session.add(website)
        self.session.commit()

        site = self.session.query(Website).filter(
            Website.domain == domain).first()
        site.pages = [
            Pages(date=current_time,
                  url=url,
                  title=title,
                  desc=description,
                  ads=outgoing_hrefs,
                  SSL=ssl)
        ]
        self.session.commit()
Beispiel #10
0
    def test_clean_website_url(self):

        user = User.objects.create_user(username="******", password='******')
        user.save()

        cluster = WebsiteCluster(creator=user)
        cluster.save()

        website = Website(website_url="http://website.com/", cluster=cluster)
        website.save()

        variants = [
            "http://website.com",
            "https://website.com/",
        ]

        for variant in variants:
            clean = clean_website_url(variant)
            website = Website.objects.get(website_url__contains=clean)
            self.assertTrue(website is not None)
Beispiel #11
0
def success(request):
  website = None
  tmpname = request.POST['website_name']
  tmpurl = request.POST['website_url']
  try:
    website = Website.objects.get(url=tmpurl)
  except Website.DoesNotExist:
    website = Website(name=tmpname, url=tmpurl)
    website.save()
  data = json.loads(request.POST['ads'])
  for ad in data:
    a = Ad(name=ad['fields']['name'],
        age=ad['fields']['age'],
        ethnicity=ad['fields']['ethnicity'],
        phone_number=ad['fields']['phone_number'],
        location=ad['fields']['location'],
        ad=ad['fields']['ad'],
        date=ad['fields']['date'],
        website=website)
    a.save()
  return HttpResponse("Success! <a href=\"/upload/\">Add More</a> <a href=\"/\">Home</a>")
Beispiel #12
0
                bs = self.getPage(url)
            else:
                bs = self.getPage(site.url + url)
            if bs is None:
                print("page or url wrong!")
                return
            title = self.safeGet(bs, site.titleTag)
            body = self.safeGet(bs, site.bodyTag)
            if title != "" and body != "":
                content = Content(topic, title, body, url)
                content.print()


crawler = Crawler()

siteData = [[
    "Oreilly Media", 'http://oreilly.com', 'https://ssearch.oreilly.com/?q=',
    'article.product-result', 'p.title a', True, 'h1',
    'section#product-description'
]]

sites = []
for row in siteData:
    sites.append(
        Website(row[0], row[1], row[2], row[3], row[4], row[5], row[6],
                row[7]))
topics = ['python', 'data science']
for topic in topics:
    print("Get info about: " + topic)
    for targetSite in sites:
        crawler.search(topic, targetSite)