Beispiel #1
0
 def generate_post_data(assigned_url, new_or_update):
     returning_new_posts, returning_price_drops = [], []
     request = requests.get(assigned_url)
     content = request.content
     print(assigned_url)
     soup = BeautifulSoup(content, "html.parser")
     link_soup = soup.findAll("div", {"class": "clearfix"})
     for link in link_soup:
         url_link = (link.find(
             "a", {"class": "title enable-search-navigation-flag "}))
         if url_link is None:
             continue
         else:
             _id_html = (link.find(
                 "div", {
                     "class":
                     "watch watchlist-star p-vap-lnk-actn-addwtch"
                 }))
             url = base + url_link.get('href')
             title = url_link.text.lstrip()
             if "Wanted:" in title:
                 continue
             image_html = (link.find("img", {"alt": title}))
             image = image_html.get('src')
             prices = []
             prices.append((link.find(
                 "div",
                 {"class": "price"})).text.lstrip().split("\n")[0])
             date_posted = link.find("span", {
                 "class": "date-posted"
             }).text.strip()
             location_html = link.find("div", {"class": "location"})
             location = location_html.text.strip().replace(
                 date_posted, "")
             if link.find("div",
                          {"class": "dealer-logo-image"}) is None:
                 seller = "Owner"
             else:
                 seller = "Dealer"
             kms_trans_html = link.find("div", {"class": "details"})
             try:
                 kms = (kms_trans_html.text.split("|")[1]).strip()
             except IndexError:
                 kms = "N/A"
             transmission = kms_trans_html.text.split("|")[0].strip()
             description_html = link.find("div",
                                          {"class": "description"})
             description = description_html.text.replace(
                 (transmission + " | " + kms), "").strip()
             # Check for expired posts
             request = requests.get(url)
             content = request.content
             soup = BeautifulSoup(content, "html.parser")
             try:
                 _id = (_id_html.get('data-adid')).strip()
                 post = Post.from_mongo_post_id(_id)
                 if prices[0] != post.prices[0]:
                     print(prices[0])
                     print(post.prices[0])
                     prices = prices + post.prices
                     print(prices)
                     print(type(prices))
                     if new_or_update == 'update':
                         price_drops.append(post)
                 post.update_post({
                     "_id": _id,
                     "type": "kijiji",
                     "location": location,
                     "kms": kms,
                     "image": image,
                     "title": title,
                     "date_posted": date_posted,
                     "star": post.star,
                     "hide": post.hide,
                     "seller": seller,
                     "prices": prices,
                     "transmission": transmission,
                     "description": description,
                     "url": url,
                     "pull_id": self._id
                 })
         # If it doesn't exist, create it
             except TypeError:
                 post = Post(_id=_id,
                             type="kijiji",
                             location=location,
                             kms=kms,
                             image=image,
                             title=title,
                             date_posted=date_posted,
                             seller=seller,
                             prices=prices,
                             transmission=transmission,
                             description=description,
                             url=url,
                             pull_id=self._id)
                 post.save_to_mongo()
                 if new_or_update == 'update':
                     new_posts.append(post)
     try:
         next_pge = soup.find("a", {"title": "Next"}).get('href')
         gen_new_posts, gen_price_drops = generate_post_data(base +
                                                             next_pge)
         returning_new_posts = new_posts + gen_new_posts
         returning_price_drops = price_drops + gen_price_drops
     except AttributeError:
         return returning_new_posts, returning_price_drops
Beispiel #2
0
 def generate_autotrader_posts_data(self, new_or_update):
     new_posts, price_drops = [], []
     session = Scraping.generate_session()
     request = session.get(self.autotrader_url)
     print(self.autotrader_url)
     if request.status_code != 200:
         print(
             "autotrader_autotrader_posts_data() has returned a non-200 response code."
         )
         print("Status code: {}".format(request.status_code))
         print("Cookies: {}".format(session.cookies.get_dict))
         print("Url attempted: {}".format(self.autotrader_url))
     else:
         content = request.content
         soup = BeautifulSoup(content, "html.parser")
         results_count = int(
             soup.find("span", {
                 "class": "at-results-count pull-left"
             }).text) + 1
         session.params = {"rcp": results_count}
         request = session.get(self.autotrader_url)
         content = request.content
         soup = BeautifulSoup(content, "html.parser")
         posts_soup = soup.findAll(
             "div", {
                 "class":
                 lambda L: L and L.startswith('col-xs-12 result-item-inner')
             })
         for post in posts_soup:
             url_html = post.find("a", {"class": "main-photo click"})
             url = url_html.get('href')
             image = url_html.find("img").get("data-original")
             title = post.find("a", {
                 "class": "result-title click"
             }).text.strip()
             try:
                 kms = post.find("div", {"class": "kms"}).text.strip()
             except AttributeError:
                 kms = "--"
             description = post.find("p", {
                 "itemprop": "description"
             }).text.split('...')[0].strip().split("\n")[0] + "..."
             path = urllib.parse.urlparse(url).path
             url_string = unidecode.unidecode(urllib.request.unquote(path))
             location = ("{}, {}".format(
                 url_string.split("/")[4].title(),
                 url_string.split("/")[5].title()))
             _id = url.split("/")[6]
             url = "http://www.autotrader.ca" + url
             prices = []
             prices.append(
                 post.find("span", {
                     "class": "price-amount"
                 }).text)
             date_posted = "--"
             transmission = "--"
             seller_html = post.find("div", {
                 "class": "seller-logo-container"
             }).findAll(("img", {
                 "id": "imgDealerLogo",
                 "src": "/Images/Shared/blank.png",
                 "alt": lambda L: L and L.startswith('')
             }))
             if len(seller_html) == 0:
                 seller = "Owner"
             else:
                 seller = "Dealer"
         # Try to find the post in the database by _id
             try:
                 post = Post.from_mongo_post_id(_id)
                 if prices[0] != post.prices[0]:
                     print(prices[0])
                     print(post.prices[0])
                     prices = prices + post.prices
                     print(prices)
                     print(type(prices))
                     if new_or_update == 'update':
                         price_drops.append(post)
                 post.update_post({
                     "_id": _id,
                     "type": "autotrader",
                     "location": location,
                     "kms": kms,
                     "image": image,
                     "title": title,
                     "date_posted": date_posted,
                     "star": post.star,
                     "hide": post.hide,
                     "seller": seller,
                     "prices": prices,
                     "transmission": transmission,
                     "description": description,
                     "url": url,
                     "pull_id": self._id
                 })
             # If it doesn't exist, create it
             except TypeError:
                 print("creating post... {}".format(
                     urllib.parse.urlparse(title)))
                 post = Post(_id=_id,
                             type="autotrader",
                             location=location,
                             kms=kms,
                             image=image,
                             title=title,
                             date_posted=date_posted,
                             seller=seller,
                             prices=prices,
                             transmission=transmission,
                             description=description,
                             url=url,
                             pull_id=self._id)
                 post.save_to_mongo()
                 if new_or_update == 'update':
                     new_posts.append(post)
     print("price_drops: {}".format(price_drops))
     print("new posts: {}".format(new_posts))
     Emailer.send_email(self.author_id, new_posts, passed_msg='new_post')
     Emailer.send_email(self.author_id,
                        price_drops,
                        passed_msg='price_drop')
Beispiel #3
0
def change_flags(variable, flag, post_id, template):
    post = Post.from_mongo_post_id(post_id)
    if flag == 'True':
        flag = True
    else:
        flag = False
    if variable == 'star':
        post.update_post({
            "_id": post._id,
            "location": post.location,
            "kms": post.kms,
            "image": post.image,
            "title": post.title,
            "date_posted": post.date_posted,
            "seller": post.seller,
            "prices": post.prices,
            "transmission": post.transmission,
            "description": post.description,
            "url": post.url,
            "pull_id": post.pull_id,
            "hide": post.hide,
            "star": flag,
            "type": post.type
        })
    else:
        post.update_post({
            "_id": post._id,
            "location": post.location,
            "kms": post.kms,
            "image": post.image,
            "title": post.title,
            "date_posted": post.date_posted,
            "seller": post.seller,
            "prices": post.prices,
            "transmission": post.transmission,
            "description": post.description,
            "url": post.url,
            "pull_id": post.pull_id,
            "hide": flag,
            "star": post.star,
            "type": post.type
        })
    if template == 'starred_posts.html':
        user = User.get_by_email(session['email'])
        pulls = Pull.find_by_author_id(user.id)
        posts = []
        for pull in pulls:
            try:
                posts = posts + Post.get_starred_posts(pull._id)
            except TypeError:
                continue
    else:
        pull = Pull.from_mongo(post.pull_id)
        posts = Post.from_mongo(pull._id)
    if template == 'posts.html':
        return make_response(load_posts(pull._id))
    elif template == 'all_posts.html':
        return make_response(get_all_posts())
    elif template == 'starred_posts.html':
        make_response(starred_posts())
    elif template == 'price_drops.html':
        make_response(price_drops())
    else:
        return render_template(template, pull=pull, posts=posts)