Example #1
0
def run_the_scrapers(nofrills=None, metro=None):

    # Set up application
    # ==========================================================================================
    application = Flask(__name__)
    application.config['SQLALCHEMY_TRACK_NOTIFICATIONS'] = True
    application.config[
        'SQLALCHEMY_DATABASE_URI'] = 'mysql://root:@localhost/TheSeedSA'
    #application.config['SQLALCHEMY_DATABASE_URI'] = 'mysql://root:@localhost/TheSeed'

    db = flask_sqlalchemy.SQLAlchemy(application)

    timestamp = time.strftime('%H:%M')
    dow = list(calendar.day_abbr).index(time.strftime('%a'))

    if nofrills is not None and metro is not None:
        scr = []
        if nofrills == "true":
            scr.add('NoFrills')
        if metro == "true":
            scr.add('Metro')

        s = scraper.Scraper(scrapers=scr)
    else:
        jobs = ScraperSettings.query.filter_by(dayofweek=dow,
                                               time=timestamp).all()

        arr = []
        for j in jobs:
            if j.nofrills_enabled == 1:
                arr.append("NoFrills")
            if j.metro_enabled == 1:
                arr.append("Metro")
        s = scraper.Scraper(scrapers=arr)
Example #2
0
def _get_product_details(source, url, sku):
    """
    Scrape product metadata.

    :param url: canonical product url
    :return number of reviews and product name
    """
    sc = scraper.Scraper(source=source)
    response = sc.get_request(url)
    pr = parser.Parser(sku=sku, source=source)
    res = pr.parse(response, init=True)
    if res:
        # Save it to the database
        db_details = DB.init_db(config.get("details_db"))
        db_details = db_details.product_details
        record = {
            "status": "processing",
            "url": url,
            "product_name": res.get("product_name"),
            "review_count": res.get("review_count"),
            "review_page_count": res.get("page_count"),
            "source": source,
            "sku": sku,
            "img": res.get("img_url"),
            "timestamp": time.time(),
        }
        db_details.insert_one(record)
        logger.info("Saved new product details: ")
        logger.info(record)
    return res
 def multi_user_poster(self, profiles):
     for profile in profiles:
         try:
             user_id = scraper.bot.get_user_id_from_username(profile)
             followers = scraper.bot.get_user_following(user_id)
             print(followers)
             for follower in followers:
                 #                    user_id = scraper.bot.get_user_id_from_username(follower)
                 #                    print(user_id)
                 print("test")
                 print(follower)
                 follow_count = scraper.Scraper().get_ig_followers_count(
                     follower)
                 follow_count = int(follow_count)
                 time.sleep(2)
                 if follow_count > 500:
                     print("it's over 10.000")
                     print(str(follow_count))
                     user = scraper.bot.get_username_from_user_id(follower)
                     self.start(user)
                 else:
                     print("not over 5000")
                     print(str(follow_count))
         except Exception as e:
             print(e)
 def test_get_reviews_no_items(self):
     """
         Asserts a scraper with no item data does not create
         any garbage entries
     """
     test_scraper = scraper.Scraper([])
     result = test_scraper.get_reviews()
     self.assertEqual(len(result), 0)
 def test_get_reviews(self, mock_build):
     """
         Asserts get_reviews builds the correct data structure
     """
     item1 = parser.ReviewItem("me", "this is the first review", 2, date.today(), False, ["sandwich"], True)
     item2 = parser.ReviewItem("you", "this is the second review", 4, date.today() - timedelta(1), True, ["chicken", "onion rings"], False)
     mock_build.side_effect = [item1, item2]
     test_scraper = scraper.Scraper([1, 2])
     result = test_scraper.get_reviews()
     self.assertEqual(len(result), 2)
     self.assertEqual(result[0], item1.__dict__)
     self.assertEqual(result[1], item2.__dict__)
"""
Generate a database from param inputs

Copyright 2020 - Steffan Jensen
Do NOT remove this copyright
Contact:
    Github: http://github.com/steffanjensen
"""
import sys
import os
import MySQLdb
sys.path.append(os.path.join(sys.path[0], "./webcreator/"))
from scraper import scraper
from config import db_host, db_password, db_username, db_name
webscraper = scraper.Scraper()


class Database(object):
    def __init__(self):
        # Test if it works
        self.host = db_host
        self.username = db_username
        self.passwd = db_password
        self.db_name = db_name

    def connect_to_db(self):
        db = MySQLdb.connect(host=self.host,
                             user=self.user,
                             passwd=self.passwd,
                             db=self.db)
        return db
Example #7
0
 def webscrape(self, value):
     username = self.username_input.text
     password = self.password_input.text
     scrape = scraper.Scraper(username=username, password=password)
     scrape.scrape()
 def setup_method(self, function):
     self.requests = MockRequests(handbook_url)
     self.scraper = scraper.Scraper(self.requests.get_webpage)