Esempio n. 1
0
def website_from_profile(profile, cluster):
    website = Website(account_key=profile.account_key,
                      return_url=profile.return_url,
                      website_name=profile.website_name,
                      website_url=profile.website,
                      cluster=cluster)
    website.save()
    return website
Esempio n. 2
0
 def delete(self):
     try:
         Website.get(url=self.url)
         if Website.delete().where((Website.customer == self.customer)
                                   and (Website.url == self.url)).execute():
             return {"message": "Website deleted successfully"}
     except DoesNotExist:
         if DoesNotExist:
             return {"message": "Website does not exist"}
Esempio n. 3
0
async def test_site(website: models.Website) -> str:
    site_status : str = ''
    # headers for chrome browser to prevent a bot block making us report false errors
    user_agent_list = [
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:77.0) Gecko/20100101 Firefox/77.0',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
    ]
    #Pick a random user agent
    user_agent = random.choice(user_agent_list)
    #Set the headers 
    headers = {'User-Agent': user_agent}        
    try:
        try:
            # ---  send a request to the site and log the response
            response = requests.get(website.get_url(), headers=headers)
            if response.status_code == 200:
                msg = f"site {website.name} ({website.get_url()}) online with 200 response"
                log.info(msg=msg)
            else:
                msg = f"Warnign: site {website.name} ({website.get_url()}) online but has a {response.status_code} response"
                log.warning(msg)
            site_status = str(response.status_code)
        except requests.exceptions.SSLError as e:
            # log security error
            msg: str = f"{site_status} on site '{website.name}' ({website.get_url()}):\n\t{e}\n"
            log.error(msg,exc_info=False)
            # --- Retry the connection without verifying the SSL certificate
            # --- First we ignore SSL errors
            # Get the requests session
            session: requests.Session = requests.Session()
            # set the session to not verify SSL certificates
            session.verify = False
            # send a request to the site and log the response
            response = session.get(website.get_url(), headers=headers)
            if response.status_code == 200:
                msg = f"Warning: site {website.name} ({website.get_url()}) online with 200 response and invalid certificate"
                log.warn(msg=msg)
            else:
                msg = f"Warnign: site {website.name} ({website.get_url()}) online but has a {response.status_code} response invalid certificate"
                log.warning(msg)
            site_status = f"Invalid Certificate ({response.status_code})"
    except requests.exceptions.ConnectionError as e:
        site_status = 'Unable to connect'
        # log connection error
        msg: str = f'{site_status} site {website.name} ({website.get_url()}):\n\t{e}\n'
        log.error(msg,exc_info=True)
    except Exception as e:
        site_status = 'Unknown Error checking page'
        # log the exception
        msg: str = f'Unknown exception checking site {website.name} ({website.get_url()}):\n\t{e}\n'
        log.error(msg,exc_info=True)
    finally:
        return site_status
Esempio n. 4
0
 def update(self):
     try:
         Website.get(url=self.url)
     except DoesNotExist:
         if DoesNotExist:
             return {"message": "Website does not exist"}
     else:
         Website.update(url=self.url).where(
             Website.customer == self.customer).returning(Website)
         return {"message": "Wesite updated successfully"}
Esempio n. 5
0
    def create(self):
        """ check customers plan and limit them to the number of website that's accordigng to their plan"""
        "check customers plan, check customers website count"

        if re.match(self.pattern, self.url, flags=0):
            try:
                customer = Customer.get(id=self.customer)
            except DoesNotExist:
                return {"message": "Customer does not exist"}

            website_count = Website.select().where(
                Website.customer == customer.id).count()
            if customer.plan.quantity != 0 and customer.plan.quantity > website_count and customer.renewal_date != None:
                website = Website(url=self.url, customer=self.customer)
                website.save()
                message = "Website created successfully"
                return {"message": message}

            if customer.plan.quantity == 0 and customer.renewal_date != None:
                website = Website(url=self.url, customer=self.customer)
                website.save()
                message = "Website created successfully"
                return {"message": message}

            if customer.renewal_date == None:
                return {"message": "Sorry, your plan has expired"}

            else:
                return {
                    "message":
                    "Sorry, you can't add more websites, your have exceeded your subscription limit"
                }
        else:
            return {"message": "Invalid website"}
Esempio n. 6
0
def add_websites(db):
    website_1 = Website.create_and_add(id=1, url="www.google.com")

    website_2 = Website.create_and_add(id=2, url="www.ebay.com")

    website_3 = Website.create_and_add(id=3, url="www.amazon.com")

    website_4 = Website.create_and_add(id=4, url="www.github.com")

    website_5 = Website.create_and_add(id=5, url="www.reddit.com")

    website_6 = Website.create_and_add(id=6, url="www.facebook.com")

    website_7 = Website.create_and_add(id=7, url="www.twitter.com")

    website_8 = Website.create_and_add(id=8, url="www.bing.com")

    website_9 = Website.create_and_add(id=9, url="www.youtube.com")

    keys.add("website_1_id", website_1.id)
    keys.add("website_2_id", website_2.id)
    keys.add("website_3_id", website_3.id)
    keys.add("website_4_id", website_4.id)
    keys.add("website_5_id", website_5.id)
    keys.add("website_6_id", website_6.id)
    keys.add("website_7_id", website_7.id)
    keys.add("website_8_id", website_8.id)
    keys.add("website_9_id", website_9.id)
Esempio n. 7
0
 def post(self):
     _user = g.user
     if not _user.is_root:
         return {'status': 400, 'message': '需要系统管理员权限'}, 400
     _name = request.json.get('name', None)
     if not _name or len(_name) > 40:
         return {'status': 400, 'message': '请提供正确的网站名称'}, 400
     _website = Website(name=_name)
     db.session.add(_website)
     db.session.commit()
     return _website.to_dict(), 201
Esempio n. 8
0
def populate_topsites(num=100):
    sites = alexa.topsites(num)
    for s in sites:
        q = db.session.query(Website).filter_by(url=s['url'])
        w = q.first()
        if not w:
            w = Website(url=s['url'])
        w.global_rank = s['global_rank']
        w.reach_per_million = s['reach_per_million']
        w.page_views_per_million = s['page_views_per_million']
        w.page_views_per_user = s['page_views_per_user']
        print "Adding %s" % w.url
        db.session.add(w)
    db.session.commit()
    print "Finished updating topsites ====================\n"
Esempio n. 9
0
def main():
    session = Session()
    start_url = "https://register.start.bg"
    queue = [start_url]
    visited = [start_url]

    while len(queue):
        current_url = queue.pop(0)
        re = requests.get(current_url, timeout=10)
        server = re.headers['Server']

        website = Website(url=current_url, server=server)
        result = session.query(Website).filter(
            Website.url == current_url).first()
        if result is None:
            session.add(website)
            session.commit()

        try:
            doc_html = re.content.decode('utf-8')
        except UnicodeDecodeError:
            pass

        soup = BeautifulSoup(doc_html, 'html.parser')

        for link in soup.find_all('a'):
            site = str(link.get('href'))
            if site.startswith("http") or site.startswith("https"):
                if site is not None and not site.startswith('#'):
                    if '.bg' in site and site not in visited:
                        print(f'                   ')
                        print(site)
                        queue.append(site)
                        visited.append(site)
Esempio n. 10
0
def manage_websites():
    form = WebsiteForm()
    websites = Website.query.filter_by()
    delete = request.args.get('delete', 'false')
    edit = request.args.get('edit', 'false')

    if 'email' not in session:
        return render_template('pages/placeholder.notsignin.html')
    else:
        if request.method == 'POST':
            if edit != 'false':
                website = Website.query.filter_by(domain_name=edit).first()
                website.cost = form.cost.data if form.cost.data else 0
                db.session.commit()
                return redirect(url_for('manage_websites'))

            if not form.validate():
                return render_template('pages/placeholder.websites.html',
                                       websites=websites,
                                       form=form)
            else:
                cost = form.cost.data if form.cost.data else 0
                new_website = Website(form.domain_name.data, cost)
                db.session.add(new_website)
                db.session.commit()
            return redirect(url_for('manage_websites', edit=edit))

        elif request.method == 'GET':
            if delete != 'false':
                Website.query.filter_by(domain_name=delete).delete()
                db.session.commit()
            return render_template('pages/placeholder.websites.html',
                                   websites=websites,
                                   form=form,
                                   edit=edit)
Esempio n. 11
0
def survey_3():
    g.user = current_user

    # get folders for which this user is a member
    # todo: and for which a survey has not been done this year
    sites = Website.query.filter_by(PI_username=current_user.uid_trim()).all()
    choices = []
    for f in sites:
        found = 1
        for s in f.surveys.all():
            if s.year == datetime.datetime.utcnow().year:
                found = 0  #so dont include this one
                break
        if found:
            choices.append(f.site_name)
    choices.append('Other')

    if Survey1.has_been_done_by(current_user.uid_trim(),datetime.datetime.utcnow().year)[0] \
            and Survey2.has_been_done_by(current_user.uid_trim(),datetime.datetime.utcnow().year)[0]:

        form = Survey3Form(request.form)

        if form.validate_on_submit():
            survey = Survey3(current_user.uid_trim(),
                             alt_email="not a real email")
            form.populate_obj(survey)
            survey.site = request.form.get('site_name')
            if Website.query.filter_by(
                    site_name=request.form.get('site_name')).count() == 0:
                website = Website(current_user.uid_trim(),
                                  request.form.get('other_site'),
                                  request.form.get('url'))
                db.session.add(website)
                db.session.commit()
            else:
                website = Website.query.filter_by(
                    site_name=request.form.get('site_name')).first()
            survey.website_id = website.id
            db.session.add(survey)

            db.session.commit()
            return redirect(url_for('index'))
        elif request.form.get('has_site') == 'N':
            survey = Survey3(current_user.uid_trim(),
                             alt_email="none required")
            db.session.add(survey)

            db.session.commit()
            return redirect(url_for('index'))

        return render_template('survey/Survey3.html',
                               title='Survey',
                               form=form,
                               sitefield=choices)
    else:
        return redirect(url_for('index'))
Esempio n. 12
0
def add_website(website_name, website_type):
    if website_type == "M" or website_type == "E":
        try:
            Website(website_name=website_name,
                    website_type=website_type).save()
            return json.dumps({"message": "AddingWebsiteSuccesful"}), 200
        except:
            return json.dumps({"message": "AddingWebsiteFailed"}), 401
    else:
        return json.dumps({"message": "AddingWebsiteFailed"}), 401
Esempio n. 13
0
def add(request):
    name=request.POST['name']
    url=request.POST['url']
    number=request.POST['number']
    keywords=request.POST['words']
    """
    If the website already exists, then we update it. Otherwise, we create 
    a new one.
    """
    w1=Website.objects.filter(url=url)
    """
    At this point, the previous entry for this website is deleted, rather than 
    updated. The idea is that it is faster to delete the old record, rather 
    than searching through each of its fields to check which ones need to be 
    updated and which do not.
    """
    if w1!=[]:
        w1.name=name
        w1.number=number
        w1.delete()
    #Now we add the website to the database.
    w1=Website(name=name, url=url, number=number)
    w1.save()
    """Remove blank spaces from words and separate into separate entries in 
    a list, rather than just a string.
    """   
    keywords=stl(keywords)
    """
    Add the keywords that describe this Website. If the keyword already 
    exists, then we find it and name it k1. Otherwise, we create a 
    new Keyword.
    """
    for keyword in keywords:
        try:
            k1=Keyword.objects.get(name=keyword)
            w1.words.add(k1)
        except Keyword.DoesNotExist:
            #Create the new Keyword and save it.
            k1=Keyword(name=keyword)
            k1.save()
            #add this word to the Website
            w1.words.add(k1)
Esempio n. 14
0
    def test_clean_website_url(self):

        user = User.objects.create_user(username="******", password='******')
        user.save()

        cluster = WebsiteCluster(creator=user)
        cluster.save()

        website = Website(website_url="http://website.com/", cluster=cluster)
        website.save()

        variants = [
            "http://website.com",
            "https://website.com/",
        ]

        for variant in variants:
            clean = clean_website_url(variant)
            website = Website.objects.get(website_url__contains=clean)
            self.assertTrue(website is not None)
Esempio n. 15
0
def success(request):
  website = None
  tmpname = request.POST['website_name']
  tmpurl = request.POST['website_url']
  try:
    website = Website.objects.get(url=tmpurl)
  except Website.DoesNotExist:
    website = Website(name=tmpname, url=tmpurl)
    website.save()
  data = json.loads(request.POST['ads'])
  for ad in data:
    a = Ad(name=ad['fields']['name'],
        age=ad['fields']['age'],
        ethnicity=ad['fields']['ethnicity'],
        phone_number=ad['fields']['phone_number'],
        location=ad['fields']['location'],
        ad=ad['fields']['ad'],
        date=ad['fields']['date'],
        website=website)
    a.save()
  return HttpResponse("Success! <a href=\"/upload/\">Add More</a> <a href=\"/\">Home</a>")
Esempio n. 16
0
def website_upload():
    postJson = json.loads(request.data)
    app.logger.debug(postJson)
    if not postJson.has_key('hostname'):
        return jsonify(status='missing hostname')

    technologies = []
    for t in postJson['technologies']:
        if not t.has_key('title'):
            return jsonify(status='missing technology title')
        if not t.has_key('category'):
            return jsonify(status='missing technology category')
        if not t.has_key('url'):
            t['url'] = None
        if not t.has_key('detail'):
            t['detail'] = None

        # 完全一致的技术
        tmpTech = Technology.query.filter_by(title=t['title']).filter_by(
            category=t['category']).filter_by(detail=t['detail']).first()
        if tmpTech is None:
            tmpTech = Technology(category=t['category'],
                                 title=t['title'],
                                 detail=t['detail'],
                                 url=t['url'])
            db.session.add(tmpTech)

        technologies.append(tmpTech)

    upload = Website.query.filter_by(hostname=postJson['hostname'],
                                     port=postJson['port']).first()
    if not upload:
        upload = Website(hostname=postJson['hostname'],
                         port=postJson['port'],
                         title=postJson['title'],
                         ipaddress=postJson['ipaddress'],
                         geo=postJson['geo'],
                         technologies=technologies)
    else:
        upload.last_time = datetime.now()
        upload.title = postJson['title']
        upload.technologies = technologies
        upload.ipaddress = postJson['ipaddress']
        upload.geo = postJson['geo']
        upload.frequency = upload.frequency + 1

    db.session.add(upload)
    db.session.commit()

    return jsonify(status='ok')
Esempio n. 17
0
def upload(request):
  if request.method == 'POST':
    form = UploadFileForm(request.POST, request.FILES)
    if form.is_valid():
      f = request.FILES['file']
      website = None
      tmpurl = request.POST['website']
      tmpname = ""
      if tmpurl == "NEWSITE":
        tmpurl = request.POST['newSiteUrl']
        tmpname = request.POST['newSiteName']
      try:
        website = Website.objects.get(url=tmpurl)
      except Website.DoesNotExist:
        website = Website(name=tmpname, url=tmpurl)
      results = handle_uploaded_file(f, website)
      return render_to_response('confirm.html', {'ads': results, "website": website, "serialized_ads": serializers.serialize("json", results)})
      #return render_to_response('confirm.html', {'ads': results, "website": website, "serialized_ads": json.dumps(results)})
  else:
    form = UploadFileForm()
  websites = [Website(name="site1", url="site1.com"), Website(name="site2", url="site2.org"), Website(name="site3", url="site3.edu")]
  return render_to_response('upload.html', {'form': form, 'websites': websites})
Esempio n. 18
0
    def save_info_to_database(self, soup, url):
        if "DOCTYPE html" in soup:
            print("YES")

        try:
            description = soup.find(property="og:description")["content"]
            title = soup.title.string
        except Exception:
            print("Image link")
            return

        if "https://" in url:
            ssl = 1
        else:
            ssl = 0

        domain = self.get_page_main_url(url)

        current_time = datetime.now()

        hrefs = self.get_page_hrefs(soup)
        pages_count = len(hrefs)
        outgoing_hrefs = [
            self.parse_url(url, href) for href in hrefs
            if domain not in self.parse_url(url, href)
        ]
        outgoing_hrefs = len(outgoing_hrefs)

        website = Website(title=title, domain=domain, pages_count=pages_count)
        self.session.add(website)
        self.session.commit()

        site = self.session.query(Website).filter(
            Website.domain == domain).first()
        site.pages = [
            Pages(date=current_time,
                  url=url,
                  title=title,
                  desc=description,
                  ads=outgoing_hrefs,
                  SSL=ssl)
        ]
        self.session.commit()
Esempio n. 19
0
def website_upload():
    postJson = json.loads(request.data)
    app.logger.debug(postJson)
    if not postJson.has_key('hostname'):
        return jsonify(status = 'missing hostname')

    technologies = []
    for t in postJson['technologies']:
        if not t.has_key('title'):
            return jsonify(status = 'missing technology title')
        if not t.has_key('category'):
            return jsonify(status = 'missing technology category')
        if not t.has_key('url'):
            t['url'] = None
        if not t.has_key('detail'):
            t['detail'] = None

        # 完全一致的技术
        tmpTech = Technology.query.filter_by(title = t['title']).filter_by(category = t['category']).filter_by(detail = t['detail']).first()
        if tmpTech is None:
            tmpTech = Technology(category = t['category'], title = t['title'], detail = t['detail'], url = t['url'])
            db.session.add(tmpTech)

        technologies.append(tmpTech)

    upload = Website.query.filter_by(hostname = postJson['hostname'], port = postJson['port']).first()
    if not upload:
        upload = Website(hostname = postJson['hostname'], port = postJson['port'], title = postJson['title'], ipaddress = postJson['ipaddress'], geo = postJson['geo'], technologies = technologies)
    else:
        upload.last_time    = datetime.now()
        upload.title        = postJson['title']
        upload.technologies = technologies
        upload.ipaddress    = postJson['ipaddress']
        upload.geo          = postJson['geo']
        upload.frequency    = upload.frequency + 1

    db.session.add(upload)
    db.session.commit()

    return jsonify(status = 'ok')
Esempio n. 20
0
def signup(request):
    if request.method == 'POST': 
        form = SignupForm(request.POST)
        if form.is_valid():
            email = form.cleaned_data['email']
            fullname = form.cleaned_data['fullname']
            password = form.cleaned_data['password']
            url = str(form.cleaned_data['url']).lower()
            # creating the fresh user
            user = User(fullname=fullname, email=email, password=password, type=1)
            user.save()
            # creating activation record for the fresh user
            User_Activation(user=user).save()
            
            description =''
            res = get_http_response(url)
            # saving website's description
            if res:
                description=get_site_description(res)[0:510]
                if len(description) >= 507:
                    description=description[0:507]+'...'
                    
            name=extract_website_name(remove_landing_url(url))
            
            # insert the url to websites table in an unverified state 
            # associated to the fresh user -- custom validator made sure that no
            # record with same url is verified yet
            website = Website(url=url, name=name, description=description, type=1)
            website.save()
            
            # saving website's icon
            favicon_url = get_site_favicon_url(remove_landing_url(url)) or ''
            if favicon_url:
                favicon_content = http_read(favicon_url) or ''
                if favicon_content:
                    filename32 = make_random_string(32)+'.png'
                    filename48 = make_random_string(32)+'.png'
                    filename64 = make_random_string(32)+'.png'
                    favicon32 = ContentFile(normalize_img((32,32),favicon_content))
                    favicon48 = ContentFile(normalize_img((48,48),favicon_content))
                    favicon64 = ContentFile(normalize_img((64,64),favicon_content))
                    website.favicon32.save(filename32, favicon32)
                    website.favicon48.save(filename48, favicon48)
                    website.favicon64.save(filename64, favicon64)
            
            # first site screenshot insertion
            screenshot_name = make_random_string(32)+'.png'
            
            screenshot_image_content = site_screenshot(url)
            if screenshot_image_content:
                p = ImageFile.Parser()
                p.feed(screenshot_image_content)
                im=p.close()
                screenshot_image_content = ContentFile(normalize_img((im.size[0]/2,im.size[1]/2),screenshot_image_content))
                wi = Website_Image(name=name, website=website)
                wi.image.save(screenshot_name, screenshot_image_content)
            # the actual user-website association creation
            User_Website(user=user,website=website).save()
            response = HttpResponse('Signup successful, please activate via email')
            return response
    # request.method is GET
    else:
        form = SignupForm() # An unbound form
        
    return render_form('signup_form.html', form, '', request)
Esempio n. 21
0
                bs = self.getPage(url)
            else:
                bs = self.getPage(site.url + url)
            if bs is None:
                print("page or url wrong!")
                return
            title = self.safeGet(bs, site.titleTag)
            body = self.safeGet(bs, site.bodyTag)
            if title != "" and body != "":
                content = Content(topic, title, body, url)
                content.print()


crawler = Crawler()

siteData = [[
    "Oreilly Media", 'http://oreilly.com', 'https://ssearch.oreilly.com/?q=',
    'article.product-result', 'p.title a', True, 'h1',
    'section#product-description'
]]

sites = []
for row in siteData:
    sites.append(
        Website(row[0], row[1], row[2], row[3], row[4], row[5], row[6],
                row[7]))
topics = ['python', 'data science']
for topic in topics:
    print("Get info about: " + topic)
    for targetSite in sites:
        crawler.search(topic, targetSite)