class Preprocessor: def __init__(self, config): self.config = config self.exporter = Exporter(config) def process(self, pages): for page in tqdm(pages, file=sys.stdout, desc='Preprocess all pages...'): self._process_image(page) def _process_image(self, page): page.load_image() if not self.config.skip_length_trimming: self._crop_height_dina4_ratio(page) if (self.config.duplex or self.config.manual_duplex) \ and page.is_backside \ and not self.config.skip_backside_rotation: self._rotate_page(page) if not self.config.skip_trim_pages: self._trim_image(page) if not self.config.skip_empty_page_removal: self._remove_page_if_empty(page) self.exporter.save_as_jpg(page) page.unload_image() def _remove_page_if_empty(self, page): empty_threshold = 100 * page.image.standard_deviation / page.image.quantum_range if empty_threshold < self.config.empty_threshold: page.removed = True print( f'remove blank page: {page.filename} because {empty_threshold} < {self.config.empty_threshold}' ) @staticmethod def _crop_height_dina4_ratio(page): ratio = 210.0 / 297.0 height = int(page.image.width / ratio) if height < page.image.height: page.image.crop(0, 0, width=page.image.width, height=height) @staticmethod def _rotate_page(page): page.image.rotate(180) @staticmethod def _trim_image(page): page.image.trim(fuzz=0.2 * page.image.quantum_range, color='white')
def exporter(celery_config): return Exporter( { "broker_url": celery_config["broker_url"], "port": 17000, } )
def exporter(find_free_port, celery_config): cfg = { "port": find_free_port(), "broker_url": celery_config["broker_url"], "broker_transport_option": ["visibility_timeout=7200"], "broker_ssl_option": [], "retry_interval": 5, "log_level": "DEBUG", } exporter = Exporter() setattr(exporter, "cfg", cfg) yield exporter
def main(): # Instatiate objects twitter = TwitterSearch() queries = FileParser("./queries.txt").queries exporter = Exporter('results') amount = 250 # Start time tic = time.perf_counter() # Search twitter.search(queries, amount) results = twitter.results # End time toc = time.perf_counter() print( f'Scraped {len(queries) * amount} tweets in: {toc - tic:0.2f} seconds') # Export results exporter.export(results)
def main(config): scanner = Scanner(config) preprocessor = Preprocessor(config) utils = Utils(config) scanner.scan(front=True) if config.manual_duplex: print('rotate pages and press enter') input() scanner.scan(front=False) pages = scanner.get_pages() preprocessor.process(pages) exporter = Exporter(config) exporter.save_as_pdf(pages) if utils.show_preview(): exporter.upload_doc() utils.clean_up(pages)
def exporter(find_free_port, celery_config): cfg = {"port": find_free_port(), "broker_url": celery_config["broker_url"]} exporter = Exporter() setattr(exporter, "cfg", cfg) yield exporter
import os import json from src.exporter import Exporter from src.helper import current_milli_time, slack_ping if __name__ == "__main__": try: start = current_milli_time() # Start time # Start Automation Process exporter = Exporter() exporter.create_result_dir() exporter.run_test() # Push to gateway report total, success_apis, failed_apis = exporter.push_report() # Clean result cache exporter.clean_results() end = current_milli_time() # End time time_taken = end - start # Ping slack slack_ping(total, len(success_apis), len(failed_apis), time_taken) if len(failed_apis) > 0: print("Failed:\n") for err in failed_apis:
def __init__(self, config): self.config = config self.exporter = Exporter(config)
def solution(): st.title(f'Solution tool') st.write(''' Page to upload and process an unseen dataset. Processing is done by the **StatisticalWaiterBundler** trained on all the initial dataset from challenge (`notifications.csv`). Results can be exported but are limited to first 5k rows due to technical issues. Processing is still done on all notifications. A performance review is also available at the bottom. ''') st.info('Delay predictor is trained on notifications.csv (full dataset).') st.markdown('**Upload dataset from url**') url_path = st.text_input('Dataset URL') if url_path is not None and url_path != '': st.title('CLI') st.write('Compute offline using the CLI interface :') st.write( f"```\npython cli.py --input-file='{url_path}' --output-file='output.csv' --review\n```" ) st.title('Process') with st.spinner(f'Load data from {url_path}'): t0 = time.time() streamer = NotificationStreamer(url_path) t1 = time.time() st.success(f'Load data finished in {round(t1-t0, 2)}s') with st.spinner('Processing bundler...'): t0 = time.time() bundler = StatisticalWaiterNotificationBundler(DATASETS_DIRPATH / 'notifications.csv') bundled_notifications = bundler.export(streamer.stream()) t1 = time.time() st.success(f'Bundler finished in {round(t1-t0, 2)}s') with st.spinner('Processing reviewer...'): t0 = time.time() reviewer = Reviewer() review = reviewer.review(streamer.notifications, bundled_notifications) t1 = time.time() st.success(f'Reviewer finished in {round(t1-t0, 2)}s') st.title('Exports') initial_notifications = Exporter(streamer.notifications).format() write_df('Initial notifications', initial_notifications) file_download(initial_notifications, 'initial_notifications') bundled_notifications = Exporter(bundled_notifications).format() write_df('Bundled notifications', bundled_notifications) file_download(bundled_notifications, 'bundled_notifications') st.title('Performances') reviewer.display_review()
def interactive_tests(): st.title('Parameters of the test') st.markdown('**Choose local csv...**') path = st.selectbox('Dataset file', DATASETS_PATHS) st.markdown('**...or upload from url**') url_path = st.text_input('Dataset URL') if url_path is not None and url_path != '': st.warning('Tests from URL are not efficient since the file is downloaded multiple times.') path = url_path limit = st.number_input( 'Number of notifications to load (-1 to load all dataset)', min_value=-1, max_value=None, value=10000, step=10000, ) bundler_class = st.selectbox('Algorithm', options=[ WaiterNotificationBundler, NaiveNotificationBundler, CheaterNotificationBundler, StatisticalWaiterNotificationBundler, ], format_func=lambda x: x.__name__) if bundler_class is WaiterNotificationBundler: default_waiting_time_in_hour = st.number_input( 'Default waiting time (in hour)', min_value=0., max_value=24., value=1., step=0.25, ) bundler_kwargs = {'default_waiting_time_in_hour': default_waiting_time_in_hour} elif bundler_class is StatisticalWaiterNotificationBundler: train_filepath = STATISTICS_DIRPATH / st.selectbox( 'Dataset used to train DelayPredictor', options=[ 'august_train_dataset.csv', 'notifications.csv' ], ) bundler_kwargs = {'train_filepath': train_filepath} else: bundler_kwargs = {} streamer = NotificationStreamer(path, limit=limit) bundler = bundler_class(**bundler_kwargs) reviewer = Reviewer() st.title(f'{bundler.__class__.__name__}') st.write(f' - Data from `{path}`') st.write(f' - {len(streamer.notifications)} notifications') st.title(f'Processing') with st.spinner('Processing bundler...'): t0 = time.time() bundled_notifications = bundler.export(streamer.stream()) t1 = time.time() st.success(f'Bundler finished in {round(t1-t0, 2)}s') with st.spinner('Processing reviewer...'): t0 = time.time() review = reviewer.review(streamer.notifications, bundled_notifications) t1 = time.time() st.success(f'Reviewer finished in {round(t1-t0, 2)}s') reviewer.display_review() st.title('Exports') initial_notifications = Exporter(streamer.notifications).format() write_df('Initial notifications', initial_notifications) file_download(initial_notifications, 'initial_notifications') bundled_notifications = Exporter(bundled_notifications).format() write_df('Bundled notifications', bundled_notifications) file_download(bundled_notifications, 'bundled_notifications')
default=False, action='store_true', help='Print review to stdout if True.') args = parser.parse_args() t0 = time.time() print(f'Load notifications from {args.input_file}.') streamer = NotificationStreamer(args.input_file) print(f'{len(streamer.notifications)} notifications found.') print('Load StatisticalWaiterNotificationBundler.') bundler = StatisticalWaiterNotificationBundler(DATASETS_DIRPATH / 'notifications.csv') print('Process bundler.') bundled_notifications = bundler.export(streamer.stream()) print(f'Save bundled notifications to {args.output_file}.') Exporter(bundled_notifications).save_to(args.output_file) if args.stdout: print(Exporter(bundled_notifications).format()) if args.review: print('Process reviewer.') pprint(Reviewer().review(streamer.notifications, bundled_notifications)) t1 = time.time() print(f'Done. Took {round(t1-t0, 1)}s.')