def main(driver_location="./chromedriver", driver=None, has_driver=False): parser = argparse.ArgumentParser() parser.add_argument("username", help="The TikTok username", type=str) parser.add_argument("--tipe", help="type: username or tag", type=str, default="username") parser.add_argument("--driver", help="Driver location", type=str) parser.add_argument("--driver-type", help="Type of driver (i.e. Chrome)", type=str) parser.add_argument( "--show-browser", help="Shows browser while scraping. Useful for debugging", action="store_true") parser.add_argument( "--delay", type=int, help="Number of seconds to delay between video downloading", default=0) parser.add_argument("--location", help="Location to store the files") args = parser.parse_args() if not args.driver: if not os.path.isfile(driver_location): try: webdriver.Chrome() has_driver = True except: import AutoChromedriver AutoChromedriver.download_chromedriver() else: driver_location = args.driver if not args.driver_type: driver = get_chromedriver(driver_location, show_browser=args.show_browser, has_driver=has_driver) else: if args.driver_type.lower() == 'chrome': driver = get_chromedriver(driver_location, show_browser=args.show_browser, has_driver=has_driver) if args.driver_type.lower() == 'firefox': driver = webdriver.Firefox() scraper.start(driver, args.username, tipe=args.tipe, folder=args.location, delay=args.delay)
def test_start(self): url = scraper.url result = scraper.start(url) self.assertIsNotNone(result, "Result None. Check it !") self.assertIsInstance(result, list, "Result must be list type !")
def main(): inputFile = open('input.txt') # can be changed to "input.txt" for example #inputFile = open('sample_input.private') scraper.start() sess = scraper.getSession() for i in inputFile: # get username username = i.strip() # Scrap only score, percentile #print str(scraper.scrape(sess,username)).strip() # Scrap completely print str(scraper.completeScrape(sess,username,debug=False)).strip() inputFile.close()
def scrap(self): keyCur = self.le_nc.text() print(keyCur) keyData = scraper.start(keyCur) rst.tableWidget.setRowCount(0) size = len(keyData) for i in range(size): rowPosition = rst.tableWidget.rowCount() rst.tableWidget.insertRow(rowPosition) #insert row at row position rst.tableWidget.setItem(rowPosition, 0, QTableWidgetItem(str(keyData[i]['title']))) rst.tableWidget.setItem(rowPosition, 1, QTableWidgetItem(str(keyData[i]['url']))) rst.tableWidget.setItem(rowPosition, 2, QTableWidgetItem(str(keyData[i]['date']))) rst.exec()
def execute(sc): print("====STARTED SCRAPING====") scraper.start() schedule.enter(TIME, 1, execute, (sc, )) print("DONE SCRAPING") print("WAITING FOR NEXT SCRAPING")
def test_start_bad_url(self): url = 123 with self.assertRaises(TypeError): result = scraper.start(url)
import json import logging.config from os import path def read_config(): try: with open(path.join(path.dirname(__file__), 'config.json')) as f: config = json.load(f) return config except IOError: print 'Could not open config.json' raise SystemExit if __name__ == '__main__': config = read_config() logging.config.dictConfig(config.get('logging')) from scraper import start start(config=config)