def run_the_scrapers(nofrills=None, metro=None): # Set up application # ========================================================================================== application = Flask(__name__) application.config['SQLALCHEMY_TRACK_NOTIFICATIONS'] = True application.config[ 'SQLALCHEMY_DATABASE_URI'] = 'mysql://root:@localhost/TheSeedSA' #application.config['SQLALCHEMY_DATABASE_URI'] = 'mysql://root:@localhost/TheSeed' db = flask_sqlalchemy.SQLAlchemy(application) timestamp = time.strftime('%H:%M') dow = list(calendar.day_abbr).index(time.strftime('%a')) if nofrills is not None and metro is not None: scr = [] if nofrills == "true": scr.add('NoFrills') if metro == "true": scr.add('Metro') s = scraper.Scraper(scrapers=scr) else: jobs = ScraperSettings.query.filter_by(dayofweek=dow, time=timestamp).all() arr = [] for j in jobs: if j.nofrills_enabled == 1: arr.append("NoFrills") if j.metro_enabled == 1: arr.append("Metro") s = scraper.Scraper(scrapers=arr)
def _get_product_details(source, url, sku): """ Scrape product metadata. :param url: canonical product url :return number of reviews and product name """ sc = scraper.Scraper(source=source) response = sc.get_request(url) pr = parser.Parser(sku=sku, source=source) res = pr.parse(response, init=True) if res: # Save it to the database db_details = DB.init_db(config.get("details_db")) db_details = db_details.product_details record = { "status": "processing", "url": url, "product_name": res.get("product_name"), "review_count": res.get("review_count"), "review_page_count": res.get("page_count"), "source": source, "sku": sku, "img": res.get("img_url"), "timestamp": time.time(), } db_details.insert_one(record) logger.info("Saved new product details: ") logger.info(record) return res
def multi_user_poster(self, profiles): for profile in profiles: try: user_id = scraper.bot.get_user_id_from_username(profile) followers = scraper.bot.get_user_following(user_id) print(followers) for follower in followers: # user_id = scraper.bot.get_user_id_from_username(follower) # print(user_id) print("test") print(follower) follow_count = scraper.Scraper().get_ig_followers_count( follower) follow_count = int(follow_count) time.sleep(2) if follow_count > 500: print("it's over 10.000") print(str(follow_count)) user = scraper.bot.get_username_from_user_id(follower) self.start(user) else: print("not over 5000") print(str(follow_count)) except Exception as e: print(e)
def test_get_reviews_no_items(self): """ Asserts a scraper with no item data does not create any garbage entries """ test_scraper = scraper.Scraper([]) result = test_scraper.get_reviews() self.assertEqual(len(result), 0)
def test_get_reviews(self, mock_build): """ Asserts get_reviews builds the correct data structure """ item1 = parser.ReviewItem("me", "this is the first review", 2, date.today(), False, ["sandwich"], True) item2 = parser.ReviewItem("you", "this is the second review", 4, date.today() - timedelta(1), True, ["chicken", "onion rings"], False) mock_build.side_effect = [item1, item2] test_scraper = scraper.Scraper([1, 2]) result = test_scraper.get_reviews() self.assertEqual(len(result), 2) self.assertEqual(result[0], item1.__dict__) self.assertEqual(result[1], item2.__dict__)
""" Generate a database from param inputs Copyright 2020 - Steffan Jensen Do NOT remove this copyright Contact: Github: http://github.com/steffanjensen """ import sys import os import MySQLdb sys.path.append(os.path.join(sys.path[0], "./webcreator/")) from scraper import scraper from config import db_host, db_password, db_username, db_name webscraper = scraper.Scraper() class Database(object): def __init__(self): # Test if it works self.host = db_host self.username = db_username self.passwd = db_password self.db_name = db_name def connect_to_db(self): db = MySQLdb.connect(host=self.host, user=self.user, passwd=self.passwd, db=self.db) return db
def webscrape(self, value): username = self.username_input.text password = self.password_input.text scrape = scraper.Scraper(username=username, password=password) scrape.scrape()
def setup_method(self, function): self.requests = MockRequests(handbook_url) self.scraper = scraper.Scraper(self.requests.get_webpage)