Example #1
0
class Preprocessor:
    def __init__(self, config):
        self.config = config
        self.exporter = Exporter(config)

    def process(self, pages):
        for page in tqdm(pages,
                         file=sys.stdout,
                         desc='Preprocess all pages...'):
            self._process_image(page)

    def _process_image(self, page):
        page.load_image()
        if not self.config.skip_length_trimming:
            self._crop_height_dina4_ratio(page)
        if (self.config.duplex or self.config.manual_duplex) \
                and page.is_backside \
                and not self.config.skip_backside_rotation:
            self._rotate_page(page)
        if not self.config.skip_trim_pages:
            self._trim_image(page)
        if not self.config.skip_empty_page_removal:
            self._remove_page_if_empty(page)
        self.exporter.save_as_jpg(page)
        page.unload_image()

    def _remove_page_if_empty(self, page):
        empty_threshold = 100 * page.image.standard_deviation / page.image.quantum_range
        if empty_threshold < self.config.empty_threshold:
            page.removed = True
            print(
                f'remove blank page: {page.filename} because {empty_threshold} < {self.config.empty_threshold}'
            )

    @staticmethod
    def _crop_height_dina4_ratio(page):
        ratio = 210.0 / 297.0
        height = int(page.image.width / ratio)
        if height < page.image.height:
            page.image.crop(0, 0, width=page.image.width, height=height)

    @staticmethod
    def _rotate_page(page):
        page.image.rotate(180)

    @staticmethod
    def _trim_image(page):
        page.image.trim(fuzz=0.2 * page.image.quantum_range, color='white')
Example #2
0
def exporter(celery_config):
    return Exporter(
        {
            "broker_url": celery_config["broker_url"],
            "port": 17000,
        }
    )
Example #3
0
def exporter(find_free_port, celery_config):
    cfg = {
        "port": find_free_port(),
        "broker_url": celery_config["broker_url"],
        "broker_transport_option": ["visibility_timeout=7200"],
        "broker_ssl_option": [],
        "retry_interval": 5,
        "log_level": "DEBUG",
    }
    exporter = Exporter()
    setattr(exporter, "cfg", cfg)
    yield exporter
Example #4
0
def main():
    # Instatiate objects
    twitter = TwitterSearch()
    queries = FileParser("./queries.txt").queries
    exporter = Exporter('results')
    amount = 250

    # Start time
    tic = time.perf_counter()

    # Search
    twitter.search(queries, amount)
    results = twitter.results

    # End time
    toc = time.perf_counter()
    print(
        f'Scraped {len(queries) * amount} tweets in: {toc - tic:0.2f} seconds')

    # Export results
    exporter.export(results)
Example #5
0
def main(config):
    scanner = Scanner(config)
    preprocessor = Preprocessor(config)
    utils = Utils(config)

    scanner.scan(front=True)
    if config.manual_duplex:
        print('rotate pages and press enter')
        input()
        scanner.scan(front=False)
    pages = scanner.get_pages()
    preprocessor.process(pages)
    exporter = Exporter(config)
    exporter.save_as_pdf(pages)
    if utils.show_preview():
        exporter.upload_doc()
    utils.clean_up(pages)
Example #6
0
def exporter(find_free_port, celery_config):
    cfg = {"port": find_free_port(), "broker_url": celery_config["broker_url"]}
    exporter = Exporter()
    setattr(exporter, "cfg", cfg)
    yield exporter
Example #7
0
import os
import json
from src.exporter import Exporter
from src.helper import current_milli_time, slack_ping

if __name__ == "__main__":

    try:
        start = current_milli_time()  # Start time

        # Start Automation Process
        exporter = Exporter()
        exporter.create_result_dir()
        exporter.run_test()

        # Push to gateway report
        total, success_apis, failed_apis = exporter.push_report()

        # Clean result cache
        exporter.clean_results()

        end = current_milli_time()  # End time

        time_taken = end - start

        # Ping slack
        slack_ping(total, len(success_apis), len(failed_apis), time_taken)

        if len(failed_apis) > 0:
            print("Failed:\n")
            for err in failed_apis:
Example #8
0
 def __init__(self, config):
     self.config = config
     self.exporter = Exporter(config)
Example #9
0
def solution():
    st.title(f'Solution tool')

    st.write('''
    Page to upload and process an unseen dataset.


    Processing is done by the **StatisticalWaiterBundler** trained on all the initial dataset from challenge (`notifications.csv`).


    Results can be exported but are limited to first 5k rows due to technical issues. Processing is still done on all notifications.


    A performance review is also available at the bottom.
    ''')

    st.info('Delay predictor is trained on notifications.csv (full dataset).')

    st.markdown('**Upload dataset from url**')
    url_path = st.text_input('Dataset URL')

    if url_path is not None and url_path != '':

        st.title('CLI')
        st.write('Compute offline using the CLI interface :')
        st.write(
            f"```\npython cli.py --input-file='{url_path}' --output-file='output.csv' --review\n```"
        )

        st.title('Process')
        with st.spinner(f'Load data from {url_path}'):
            t0 = time.time()
            streamer = NotificationStreamer(url_path)
            t1 = time.time()
            st.success(f'Load data finished in {round(t1-t0, 2)}s')

        with st.spinner('Processing bundler...'):
            t0 = time.time()
            bundler = StatisticalWaiterNotificationBundler(DATASETS_DIRPATH /
                                                           'notifications.csv')
            bundled_notifications = bundler.export(streamer.stream())
            t1 = time.time()
            st.success(f'Bundler finished in {round(t1-t0, 2)}s')

        with st.spinner('Processing reviewer...'):
            t0 = time.time()
            reviewer = Reviewer()
            review = reviewer.review(streamer.notifications,
                                     bundled_notifications)
            t1 = time.time()
            st.success(f'Reviewer finished in {round(t1-t0, 2)}s')

        st.title('Exports')
        initial_notifications = Exporter(streamer.notifications).format()
        write_df('Initial notifications', initial_notifications)
        file_download(initial_notifications, 'initial_notifications')

        bundled_notifications = Exporter(bundled_notifications).format()
        write_df('Bundled notifications', bundled_notifications)
        file_download(bundled_notifications, 'bundled_notifications')

        st.title('Performances')
        reviewer.display_review()
def interactive_tests():
    st.title('Parameters of the test')

    st.markdown('**Choose local csv...**')
    path = st.selectbox('Dataset file', DATASETS_PATHS)

    st.markdown('**...or upload from url**')
    url_path = st.text_input('Dataset URL')
    if url_path is not None and url_path != '':
        st.warning('Tests from URL are not efficient since the file is downloaded multiple times.')
        path = url_path

    limit = st.number_input(
        'Number of notifications to load (-1 to load all dataset)',
        min_value=-1,
        max_value=None,
        value=10000,
        step=10000,
    )

    bundler_class = st.selectbox('Algorithm', options=[
        WaiterNotificationBundler,
        NaiveNotificationBundler,
        CheaterNotificationBundler,
        StatisticalWaiterNotificationBundler,
    ], format_func=lambda x: x.__name__)

    if bundler_class is WaiterNotificationBundler:
        default_waiting_time_in_hour = st.number_input(
            'Default waiting time (in hour)',
            min_value=0.,
            max_value=24.,
            value=1.,
            step=0.25,
        )
        bundler_kwargs = {'default_waiting_time_in_hour': default_waiting_time_in_hour}
    elif bundler_class is StatisticalWaiterNotificationBundler:
        train_filepath = STATISTICS_DIRPATH / st.selectbox(
            'Dataset used to train DelayPredictor', options=[
                'august_train_dataset.csv',
                'notifications.csv'
            ],
        )
        bundler_kwargs = {'train_filepath': train_filepath}
    else:
        bundler_kwargs = {}

    streamer = NotificationStreamer(path, limit=limit)
    bundler = bundler_class(**bundler_kwargs)
    reviewer = Reviewer()

    st.title(f'{bundler.__class__.__name__}')

    st.write(f' - Data from `{path}`')
    st.write(f' - {len(streamer.notifications)} notifications')

    st.title(f'Processing')

    with st.spinner('Processing bundler...'):
        t0 = time.time()
        bundled_notifications = bundler.export(streamer.stream())
        t1 = time.time()
        st.success(f'Bundler finished in {round(t1-t0, 2)}s')

    with st.spinner('Processing reviewer...'):
        t0 = time.time()
        review = reviewer.review(streamer.notifications, bundled_notifications)
        t1 = time.time()
        st.success(f'Reviewer finished in {round(t1-t0, 2)}s')

    reviewer.display_review()

    st.title('Exports')
    initial_notifications = Exporter(streamer.notifications).format()
    write_df('Initial notifications', initial_notifications)
    file_download(initial_notifications, 'initial_notifications')

    bundled_notifications = Exporter(bundled_notifications).format()
    write_df('Bundled notifications', bundled_notifications)
    file_download(bundled_notifications, 'bundled_notifications')
Example #11
0
                    default=False,
                    action='store_true',
                    help='Print review to stdout if True.')
args = parser.parse_args()

t0 = time.time()

print(f'Load notifications from {args.input_file}.')
streamer = NotificationStreamer(args.input_file)
print(f'{len(streamer.notifications)} notifications found.')

print('Load StatisticalWaiterNotificationBundler.')
bundler = StatisticalWaiterNotificationBundler(DATASETS_DIRPATH /
                                               'notifications.csv')

print('Process bundler.')
bundled_notifications = bundler.export(streamer.stream())

print(f'Save bundled notifications to {args.output_file}.')
Exporter(bundled_notifications).save_to(args.output_file)

if args.stdout:
    print(Exporter(bundled_notifications).format())

if args.review:
    print('Process reviewer.')
    pprint(Reviewer().review(streamer.notifications, bundled_notifications))

t1 = time.time()
print(f'Done. Took {round(t1-t0, 1)}s.')