Beispiel #1
0
def main(driver_location="./chromedriver", driver=None, has_driver=False):
    parser = argparse.ArgumentParser()
    parser.add_argument("username", help="The TikTok username", type=str)
    parser.add_argument("--tipe",
                        help="type: username or tag",
                        type=str,
                        default="username")
    parser.add_argument("--driver", help="Driver location", type=str)
    parser.add_argument("--driver-type",
                        help="Type of driver (i.e. Chrome)",
                        type=str)
    parser.add_argument(
        "--show-browser",
        help="Shows browser while scraping. Useful for debugging",
        action="store_true")
    parser.add_argument(
        "--delay",
        type=int,
        help="Number of seconds to delay between video downloading",
        default=0)
    parser.add_argument("--location", help="Location to store the files")

    args = parser.parse_args()

    if not args.driver:
        if not os.path.isfile(driver_location):
            try:
                webdriver.Chrome()
                has_driver = True
            except:
                import AutoChromedriver
                AutoChromedriver.download_chromedriver()
    else:
        driver_location = args.driver

    if not args.driver_type:
        driver = get_chromedriver(driver_location,
                                  show_browser=args.show_browser,
                                  has_driver=has_driver)
    else:
        if args.driver_type.lower() == 'chrome':
            driver = get_chromedriver(driver_location,
                                      show_browser=args.show_browser,
                                      has_driver=has_driver)
        if args.driver_type.lower() == 'firefox':
            driver = webdriver.Firefox()

    scraper.start(driver,
                  args.username,
                  tipe=args.tipe,
                  folder=args.location,
                  delay=args.delay)
Beispiel #2
0
    def test_start(self):
        url = scraper.url

        result = scraper.start(url)

        self.assertIsNotNone(result, "Result None. Check it !")
        self.assertIsInstance(result, list, "Result must be list type !")
def main():
	
	inputFile = open('input.txt')
	
	# can be changed to "input.txt" for example
	#inputFile = open('sample_input.private')

	scraper.start()

	sess = scraper.getSession()

	for i in inputFile:
		# get username
		username = i.strip()
		
		# Scrap only score, percentile
		#print str(scraper.scrape(sess,username)).strip()

		# Scrap completely
		print str(scraper.completeScrape(sess,username,debug=False)).strip()

	inputFile.close()
Beispiel #4
0
    def scrap(self):
        keyCur = self.le_nc.text()
        print(keyCur)
        keyData = scraper.start(keyCur)
        rst.tableWidget.setRowCount(0)

        size = len(keyData)

        for i in range(size):
            rowPosition = rst.tableWidget.rowCount()
            rst.tableWidget.insertRow(rowPosition)  #insert row at row position
            rst.tableWidget.setItem(rowPosition, 0,
                                    QTableWidgetItem(str(keyData[i]['title'])))
            rst.tableWidget.setItem(rowPosition, 1,
                                    QTableWidgetItem(str(keyData[i]['url'])))
            rst.tableWidget.setItem(rowPosition, 2,
                                    QTableWidgetItem(str(keyData[i]['date'])))

        rst.exec()
Beispiel #5
0
def execute(sc):
    print("====STARTED SCRAPING====")
    scraper.start()
    schedule.enter(TIME, 1, execute, (sc, ))
    print("DONE SCRAPING")
    print("WAITING FOR NEXT SCRAPING")
Beispiel #6
0
    def test_start_bad_url(self):
        url = 123

        with self.assertRaises(TypeError):
            result = scraper.start(url)
Beispiel #7
0
import json
import logging.config
from os import path


def read_config():
    try:
        with open(path.join(path.dirname(__file__), 'config.json')) as f:
            config = json.load(f)
            return config
    except IOError:
        print 'Could not open config.json'
        raise SystemExit


if __name__ == '__main__':
    config = read_config()
    logging.config.dictConfig(config.get('logging'))
    from scraper import start
    start(config=config)