def main(): # store downloaded CSV files download_path = f'{pathlib.Path().absolute()}\\download\\' print(f'path: {download_path}') # iframe url zacks.com iframe_url = 'https://screener-api.zacks.com/?scr_type=stock&c_id=zacks&c_key=0675466c5b74cfac34f6be7dc37d4fe6a008e212e2ef73bdcd7e9f1f9a9bd377&ecv=2ITM2QTOyQDO&ref=screening#' # path to web-browser wb_path = "C:\\Program Files\\Mozilla Firefox\\firefox.exe" # path to web-driver firefox_dPath = f'{pathlib.Path().absolute()}\\drivers\\Firefox\\geckodriver.exe' if len(sys.argv) >= 2: order = "no-sorting" output_fn = "all-no-sorting.csv" # test if user provided a sorting method # ---------------------------------------------------------------------------------- # if sys.argv[1] != "ascending" and sys.argv[1] != "descending": print(f'Default sorting for final report: {order}') elif sys.argv[1] == "ascending" or sys.argv[1] == "descending": order = sys.argv[1] # user should provide a sorting name print(f'Sorting for final report: {order}') # ---------------------------------------------------------------------------------- # # sort report # ---------------------------------------------------------------------------------- # if order == "ascending": output_fn = "all-ascending.csv" if order == "descending": output_fn = "all-descending.csv" # ---------------------------------------------------------------------------------- # # Init FireFox Run # ---------------------------------------------------------------------------------- # run_firefox = RunFirefox(download_path, iframe_url, wb_path, firefox_dPath) # ---------------------------------------------------------------------------------- # # init CSV Reader # ---------------------------------------------------------------------------------- # csv_dir = "download/" csv_dir_out = "output/" csv_reader = CSVReader(csv_dir, csv_dir_out) # init CSV reader csv_reader.clean_download( ) # remove downloaded reports before getting new files # ---------------------------------------------------------------------------------- # # get csv files that have tickers and its values # ---------------------------------------------------------------------------------- # s0 = 10 # time in seconds --> system time page_load_time = 180 # time in seconds to wait for page to load wait_time = 1 # Explicit wait time driver_time = 60 # implicitly wait time --> driver time # Run firefox for zacks.com run_firefox.run_firefox(s0, page_load_time, wait_time, driver_time) # using a url of the iframe # ---------------------------------------------------------------------------------- # # working with csv files # ---------------------------------------------------------------------------------- # print(f'Building final report for zacks.com tickets.') csv_reader.empty_reports("output/all-ascending.csv") # empty doc csv_reader.empty_reports("output/all-descending.csv") # empty doc csv_reader.empty_reports("output/all-no-sorting.csv") # empty doc csv_reader.empty_reports("output.html") # empty doc print(f'Sorting final report: {order}') csv_reader.write_csv( order ) # arg: ascending, descending, no-sorting; overwrites existing file csv_reader.clean_download() # remove downloaded reports # load final report into dictionary and this report will not include the header from CSV file final_report = csv_reader.read_final_report(output_fn) num_of_records = len(final_report) print(f'Report is reade.') # ---------------------------------------------------------------------------------- # # Using Selenium WebDriver # ---------------------------------------------------------------------------------- # s0 = 2 # time in seconds --> system time page_load_time = 180 # time in seconds to wait for page to load wait_time = 1 # Explicit wait time driver_time = 60 # implicitly wait time --> driver time # singlethread: get charts # singlethread(final_report, s0, page_load_time, wait_time, driver_time) # multithreading: get charts # multithreading(final_report, csv_reader, num_of_records, s0, page_load_time, wait_time, driver_time) # ---------------------------------------------------------------------------------- # # Using BeautifulSoup and urllib3 # ---------------------------------------------------------------------------------- # # singlethread: get charts pc = PageCrawler() pc.run(final_report) # multithreading: get charts # ---------------------------------------------------------------------------------- # else: print( "Please, provide one of the sorting names: ascending, descending, no-sorting as an argument." )