def main(): args = parse_args() try: execute(args) except ICheckMoviesMisconfigurationException as e: command_line.error(str(e)) sys.exit(1)
def execute(args): try: parser = get_parser_from_arg(args.source)(args) movies = execute_parsing(args, parser) execute_inserting(args, movies, parser) except RatSException as e: command_line.error(str(e))
def _handle_captcha_challenge_if_present(self): if len(self.browser.find_elements_by_xpath("//div[@id='auth-captcha-image-container']")) > 0: command_line.error("Login to {site_name} failed.".format(site_name=self.site_name)) sys.stdout.write("There seems to be a Captcha challenge present for the login. Please try again later.\r\n") sys.stdout.flush() self.browser_handler.kill() sys.exit(1)
def main(): args = parse_args() try: execute(args) except RatSException as e: command_line.error(str(e)) sys.exit(1)
def _handle_login_unsuccessful(self): time.sleep(1) if self._user_is_not_logged_in(): command_line.error("Login to {site_name} failed.".format(site_name=self.site_name)) sys.stdout.write("Please check if the credentials are correctly set in your credentials.cfg\r\n") sys.stdout.flush() self.browser_handler.kill() sys.exit(1)
def _check_login_successful(self): if len(self.browser.find_elements_by_xpath(self.LOGIN_BUTTON_SELECTOR)) > 0 \ and len(self.browser.find_elements_by_xpath(self.LOGIN_USERNAME_SELECTOR)) > 0 \ and len(self.browser.find_elements_by_xpath(self.LOGIN_PASSWORD_SELECTOR)) > 0: command_line.error("Login to %s failed." % self.site_name) sys.stdout.write("Please check if the credentials are correctly set in your credentials.cfg\r\n") sys.stdout.flush() self.kill_browser() sys.exit(1)
def execute_inserting(args, movies, parser): if args.destination: if len(movies) == 0: command_line.error("There are no files to be inserted. Did the Parser run properly?") sys.exit(1) # INSERT THE DATA for dest in args.destination: inserter = get_inserter_from_arg(dest)(args) insert_movie_ratings(inserter, movies, type(parser.site).__name__)
def get_inserter_from_arg(param): try: return INSERTERS[param.upper()] except KeyError: command_line.error(f"No inserter matching '{param}' found.") sys.stdout.write("Available inserters:\r\n") for inserter in INSERTERS: sys.stdout.write(f" - {inserter} \n") sys.stdout.flush() sys.exit(1)
def get_parser_from_arg(param): try: return PARSERS[param.upper()] except KeyError: command_line.error("No parser matching '{entered_parser}' found.".format(entered_parser=param)) sys.stdout.write("Available parsers:\r\n") for parser in PARSERS: sys.stdout.write(' - {parser} \n'.format(parser=parser)) sys.stdout.flush() sys.exit(1)
def execute_inserting(args, movies, parser): destinations = list(INSERTERS.keys()) if args.all_destinations \ else [destination.upper() for destination in args.destination] _filter_source_site_from_destinations(destinations, parser.site.site_name) if destinations: if len(movies) == 0: command_line.error( "There are no files to be inserted. Did the parser run properly?" ) sys.exit(1) # INSERT THE DATA for destination in destinations: inserter = get_inserter_from_arg(destination)(args) insert_movie_ratings(inserter, movies, type(parser.site).__name__)
def execute_parsing(args, parser): if not parser.site.CREDENTIALS_VALID: command_line.error( "No valid credentials found for {site_name}. Skipping parsing.". format(site_name=parser.site.site_name)) sys.exit(1) if args.file: # LOAD FROM FILE movies = load_data_from_file(args.file) parser.site.browser_handler.kill() else: # PARSE DATA movies = parse_data_from_source(parser) return movies
def _retrieve_pages_count_and_movies_count(self, movie_ratings_page): get_session_response = self.site.browser.execute_script(""" var xmlHttp = new XMLHttpRequest(); xmlHttp.open( "GET", "https://www.moviepilot.de/api/session", false ); xmlHttp.send( null ); return xmlHttp.responseText; """) session = json.loads(get_session_response) if 'movie_ratings' not in session: command_line.error('Could not establish a session. ' 'Please try again with the -x option if the problem persists.') self.site.browser_handler.kill() sys.exit(1) self.movies_count = session['movie_ratings'] pages_count = math.ceil(self.movies_count / 100) return pages_count
def insert_movie_ratings(inserter, movies, source): if inserter.site.CREDENTIALS_VALID: try: inserter.insert(movies, source) except Exception: # pylint: disable=broad-except # exception should be logged in a file --> issue #15 sys.stdout.flush() inserter.site.browser_handler.kill() command_line.error( "There was an exception inside {site_name} (see below). Skipping insertion." .format(site_name=inserter.site.site_name)) traceback.print_exc() else: command_line.warn( "No valid credentials found for {site_name}. Skipping insertion.". format(site_name=inserter.site.site_name))
def parse_data_from_source(parser): try: movies = parser.parse() except RatSException as e: command_line.error(str(e)) sys.exit(1) json_filename = f"{TIMESTAMP}_{type(parser.site).__name__}.json" file_impex.save_movies_to_json(movies, folder=EXPORTS_FOLDER, filename=json_filename) sys.stdout.write( f"\r\n===== {parser.site.site_displayname}: saved {len(movies)} parsed movies to " f"{EXPORTS_FOLDER}/{json_filename}\r\n") sys.stdout.flush() return movies
def _parse_ratings(self): self.before = os.listdir(self.exports_folder) # pylint: disable=attribute-defined-outside-init self._download_ratings_csv() after = os.listdir(self.exports_folder) change = self._get_downloaded_filename(after, self.before) if len(change) == 1: archive_filename = change.pop( ) # the one file that was added to the dir file_impex.extract_file_from_archive( os.path.join(self.exports_folder, archive_filename), self.downloaded_file_name, self.exports_folder) self._rename_csv_file(self.downloaded_file_name) self.movies = self._parse_movies_from_csv( os.path.join(self.exports_folder, self.csv_filename)) else: command_line.error('Could not determine file location')
def _parse_ratings(self): before = os.listdir(self.exports_folder) self._download_ratings_csv() after = os.listdir(self.exports_folder) change = self._get_downloaded_filename(after, before) if len(change) == 1: archive_filename = change.pop() # the one file that was added to the dir ratings_csv_filename = 'ratings.csv' file_impex.extract_file_from_archive( os.path.join(self.exports_folder, archive_filename), ratings_csv_filename, self.exports_folder ) self._rename_csv_file('ratings.csv') self.movies = self._parse_movies_from_csv(os.path.join(self.exports_folder, self.csv_filename)) else: command_line.error('Could not determine file location')
def execute_inserting(args, movies, parser): if not args.all_destinations and not args.destination: return destinations = list(INSERTERS.keys()) if args.all_destinations \ else [destination.upper() for destination in args.destination] _filter_source_site_from_destinations(destinations, parser.site.site_name) if destinations: if len(movies) == 0: NoMoviesForInsertion( "There are no files to be inserted. Did the parser run properly?" ) # INSERT THE DATA for destination in destinations: try: inserter = get_inserter_from_arg(destination)(args) insert_movie_ratings(inserter, movies, type(parser.site).__name__) except RatSException as e: command_line.error(str(e))
def _download_ratings_csv(self): sys.stdout.write('\r===== {site_displayname}: Retrieving ratings CSV file'.format( site_displayname=self.site.site_displayname) ) sys.stdout.flush() self.site.browser.set_page_load_timeout(10) time.sleep(1) iteration = 0 while not self._file_was_downloaded(): iteration += 1 try: self._call_download_url() except TimeoutException as e: if iteration > 10: raise e time.sleep(iteration * 1) continue if iteration > 10: command_line.error("The CSV file containing the movies data could not be downloaded.") sys.exit(1)
def parse_data_from_source(parser): try: movies = parser.parse() except RatSException as e: command_line.error(str(e)) sys.exit(1) json_filename = '{timestamp}_{sitename}.json'.format( timestamp=TIMESTAMP, sitename=type(parser.site).__name__) file_impex.save_movies_to_json(movies, folder=EXPORTS_FOLDER, filename=json_filename) sys.stdout.write( '\r\n===== {site_displayname}: saved {parsed_movies_count} parsed movies to ' '{folder}/{filename}\r\n'.format( site_displayname=parser.site.site_displayname, parsed_movies_count=len(movies), folder=EXPORTS_FOLDER, filename=json_filename)) sys.stdout.flush() return movies
def handle_request_blocked_by_website(self): if 'stackpath' in self.browser.page_source: command_line.error("The request was blocked by the website.") self.browser_handler.kill()
def post_upload_action(self): if 'Sorry, a problem occurred while importing your ratings.' in self.site.browser.page_source: command_line.error("Couldn't upload CSV.") self.site.browser_handler.kill()