Beispiel #1
0
def scrap_all_comments(base_url, urls, max_workers=256):

    urls_to_do = [url for (_, url, is_visited) in urls if not is_visited]
    urls_to_do_iterator = iter(urls_to_do)

    pbar = tqdm(initial=len(urls) - len(urls_to_do), total=len(urls))
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:

        futures = {}
        for url in itertools.islice(urls_to_do_iterator, max_workers):
            futures_executor = executor.submit(scrap_comments, url=url)
            futures.update({futures_executor: url})
        while futures:
            done, _ = concurrent.futures.wait(
                futures, return_when=concurrent.futures.FIRST_COMPLETED
            )
            for future in done:
                pbar.update(1)
                url = futures[future]
                futures.pop(future)
                try:
                    comments = future.result()
                except Exception as exc:
                    tqdm.write(f"{url} generated an exception: {exc}")
                else:
                    with DimnaDatabase(db_path, logger) as db:
                        db.update_page_visit_status(
                            base_url, url, True,
                        )
                        if comments:
                            db.insert_all_rating(base_url, comments)
            for url in itertools.islice(urls_to_do_iterator, len(done)):
                futures_executor = executor.submit(scrap_comments, url=url)
                futures.update({futures_executor: url})
    pbar.close()
Beispiel #2
0
def get_all_comments(restaurants_url, pages_tracker={}, max_workers=64):

    restaurants_url_to_do_iterator = iter(restaurants_url)
    pages_comments = []
    pbar = tqdm(total=len(restaurants_url))
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:

        futures = {}
        for restaurant_url, page_number in itertools.islice(
            restaurants_url_to_do_iterator, max_workers
        ):

            futures_executor = executor.submit(
                get_page_comments, restaurant_url, page_number
            )
            futures.update({futures_executor: restaurant_url})
        while futures:
            done, _ = concurrent.futures.wait(
                futures, return_when=concurrent.futures.FIRST_COMPLETED
            )
            for future in done:
                pbar.update(1)
                restaurant_url = futures[future]
                futures.pop(future)

                try:
                    comments = future.result()
                except Exception as exc:
                    tqdm.write(f"{restaurant_url} generated an exception: {exc}")
                else:

                    if pages_tracker:
                        pages_tracker[restaurant_url][1] += 1
                        if (
                            pages_tracker[restaurant_url][1]
                            >= pages_tracker[restaurant_url][0]
                        ):
                            with DimnaDatabase(db_path, logger) as db:
                                db.update_page_visit_status(
                                    base_url, restaurant_url, True,
                                )

                    pages_comments.append(comments)

                    with DimnaDatabase(db_path, logger) as db:
                        for comment, rating in comments["comments"]:
                            db.insert_rating(
                                base_url, comment.replace("\x00", ""), rating
                            )

            for restaurant_url, page_number in itertools.islice(
                restaurants_url_to_do_iterator, len(done)
            ):
                futures_executor = executor.submit(
                    get_page_comments, restaurant_url, page_number
                )
                futures.update({futures_executor: restaurant_url})

    pbar.close()
    return pages_comments
Beispiel #3
0
 def callback(f):
     try:
         dataHash[data] = f.result()
         futures.pop(data)
     except:  # noqa
         sys.stderr.write("Invoke error for {} for {}\n".format(
             f.exception(), data))
Beispiel #4
0
def find_all_doctors_url(base_url, cities_url, max_workers=128):
    cities_url_iterator = iter(cities_url)
    pbar = tqdm(total=len(cities_url))
    with concurrent.futures.ThreadPoolExecutor(
            max_workers=max_workers) as executor:

        futures = {}
        for city_url in itertools.islice(cities_url_iterator, max_workers):
            futures_executor = executor.submit(find_doctors_url,
                                               base_url=base_url,
                                               city_url=city_url)
            futures.update({futures_executor: city_url})
        while futures:
            done, _ = concurrent.futures.wait(
                futures, return_when=concurrent.futures.FIRST_COMPLETED)
            for future in done:
                pbar.update(1)
                city_url = futures[future]
                futures.pop(future)
                try:
                    doctors_url = future.result()
                except Exception as exc:
                    tqdm.write(f"{city_url} generated an exception: {exc}")
                else:
                    with DimnaDatabase(db_path, logger) as db:
                        db.insert_all_pages_url(base_url, doctors_url)
            for city_url in itertools.islice(cities_url_iterator, len(done)):
                futures_executor = executor.submit(find_doctors_url,
                                                   base_url=base_url,
                                                   city_url=city_url)
                futures.update({futures_executor: city_url})
    pbar.close()
Beispiel #5
0
def test_plain_future():
    executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
    futures = FutureCollection()
    future = executor.submit(fib, 33)
    futures.add('fibonacci', future)
    assert futures.done('fibonacci') is False
    assert futures._state('fibonacci') is not None
    assert future in futures
    futures.pop('fibonacci')
    assert future not in futures
Beispiel #6
0
def process_urls_parallel(analysis_urls, script_file, container_timeout,
                          max_containers):
    futures = {}
    processed_url_ids = []
    urls = analysis_urls.copy()

    with concurrent.futures.ThreadPoolExecutor(
            max_workers=max_containers) as executor:
        while len(urls) > 0:
            ## Submit jobs to container ##
            for i in range(min(len(urls), max_containers)):
                id = urls.keys()[0]
                url = urls.pop(id)
                futures[executor.submit(initiate_container, url, str(id),
                                        script_file, 0,
                                        container_timeout)] = str(id)
            res_futures = concurrent.futures.wait(
                futures,
                timeout=container_timeout,
                return_when=concurrent.futures.ALL_COMPLETED)

            for future in res_futures[0]:
                id = futures.pop(future)
                res = -1
                try:
                    res = future.result(timeout=container_timeout)
                except Exception as exc:
                    print(get_time() + 'Container_' + str(id) + ': Exception ')
                    print(exc)

                res = export_log(id)
                if res > 0:
                    print(get_time() + 'Container_' + str(id) +
                          ': URL Visited successfully!!')
                    api_requests.update_url_api(id, 'is_visited', 'true')
                    api_requests.update_url_api(id, 'visit_status', '1')
                    processed_url_ids.append(id)
                elif res == -99:
                    print(get_time() + 'Container_' + str(id) +
                          ': Chromium Crashed!!')
                    api_requests.update_url_api(id, 'visit_status', '3')
                else:
                    print(get_time() + 'Container_' + str(id) +
                          ': URL Visit failed!!')
                    api_requests.update_url_api(id, 'visit_status', '2')

            for future in res_futures[1]:
                id = futures.pop(future)
                print(get_time() + 'Container_' + str(id) +
                      ': Timeout occured!!')
                stop_container(id)
                export_log(id)
                api_requests.update_url_api(id, 'is_visited', 'false')

    return processed_url_ids
def simpleParallelZstdReading(filename):
    parallelization = os.cpu_count()
    with concurrent.futures.ThreadPoolExecutor(parallelization) as pool:
        futures = []
        with indexed_zstd.IndexedZstdFile(filename) as file:
            offsets = np.array(list(file.block_offsets().values()))
        sizes = offsets[1:] - offsets[:-1]
        t0 = time.time()
        for offset, size in zip(offsets[:-1], sizes):
            futures.append(pool.submit(readBlock, filename, offset, size))
            while len(futures) >= parallelization:
                futures.pop(0).result()
        t1 = time.time()
        print(f"Reading in parallel with a thread pool took {t1-t0:.3f}s")
Beispiel #8
0
def find_all_comments_pages(pages_url, max_workers=128):
    book_url_to_do = [
        book_url for (_, book_url, is_visited) in pages_url if not is_visited
    ]
    book_url_to_do_iterator = iter(book_url_to_do)
    pbar = tqdm(initial=len(pages_url) - len(book_url_to_do), total=len(pages_url))
    comments_url = list()
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {}
        for book_url in itertools.islice(book_url_to_do_iterator, max_workers):
            book_id, book_name = book_url.split("/")[-2:]
            first_comment_url = f"{comments_base_url}/{book_id}/{book_name}.json"
            futures_executor = executor.submit(
                find_number_of_comments, comment_url=first_comment_url
            )
            futures.update({futures_executor: book_url})
        while futures:
            done, _ = concurrent.futures.wait(
                futures, return_when=concurrent.futures.FIRST_COMPLETED
            )
            for future in done:
                pbar.update(1)
                book_url = futures[future]
                futures.pop(future)
                book_id, book_name = book_url.split("/")[-2:]
                try:
                    num_pages = future.result()
                except Exception as exc:
                    tqdm.write(f"{book_url} generated an exception: {exc}")
                else:
                    if num_pages:
                        for page in range(1, num_pages + 1):
                            comment_url = f"{comments_base_url}/{book_id}/{book_name}.json?p={page}"
                            comments_url.append([book_url, comment_url])
                    else:
                        with DimnaDatabase(db_path, logger) as db:
                            db.update_page_visit_status(
                                base_url, book_url, True,
                            )
            for book_url in itertools.islice(book_url_to_do_iterator, len(done)):
                book_id, book_name = book_url.split("/")[-2:]
                first_comment_url = f"{comments_base_url}/{book_id}/{book_name}.json"
                futures_executor = executor.submit(
                    find_number_of_comments, comment_url=first_comment_url
                )
                futures.update({futures_executor: book_url})
    pbar.close()
    return comments_url
Beispiel #9
0
def dask_executor(items, function, accumulator, **kwargs):
    """Execute using dask futures

    Parameters
    ----------
        items : list
            List of input arguments
        function : callable
            A function to be called on each input, which returns an accumulator instance
        accumulator : AccumulatorABC
            An accumulator to collect the output of the function
        client : distributed.client.Client
            A dask distributed client instance
        treereduction : int, optional
            Tree reduction factor for output accumulators (default: 20)
        status : bool, optional
            If true (default), enable progress bar
        compression : int, optional
            Compress accumulator outputs in flight with LZ4, at level specified (default 1)
            Set to ``None`` for no compression.
        priority : int, optional
            Task priority, default 0
        heavy_input : serializable, optional
            Any value placed here will be broadcast to workers and joined to input
            items in a tuple (item, heavy_input) that is passed to function.
        function_name : str, optional
            Name of the function being passed
    """
    if len(items) == 0:
        return accumulator
    client = kwargs.pop('client')
    ntree = kwargs.pop('treereduction', 20)
    status = kwargs.pop('status', True)
    clevel = kwargs.pop('compression', 1)
    priority = kwargs.pop('priority', 0)
    heavy_input = kwargs.pop('heavy_input', None)
    function_name = kwargs.pop('function_name', None)
    reducer = _reduce()
    if clevel is not None:
        function = _compression_wrapper(clevel, function, name=function_name)
        reducer = _compression_wrapper(clevel, reducer)

    if heavy_input is not None:
        heavy_token = client.scatter(heavy_input, broadcast=True, hash=False)
        items = list(zip(items, repeat(heavy_token)))
    futures = client.map(function, items, priority=priority)
    while len(futures) > 1:
        futures = client.map(
            reducer,
            [futures[i:i + ntree] for i in range(0, len(futures), ntree)],
            priority=priority,
        )
    if status:
        from dask.distributed import progress
        # FIXME: fancy widget doesn't appear, have to live with boring pbar
        progress(futures, multi=True, notebook=False)
    accumulator += _maybe_decompress(futures.pop().result())
    return accumulator
Beispiel #10
0
def process_futures(futures, encoding):
    index = 0
    while index < len(futures):
        future = futures[index]
        if not future.done():
            index = index + 1
            continue

        futures.pop(index)
        exception = future.exception()
        if exception:
            LOGGER.error(str(exception))
            continue

        parsed_line = future.result()
        if not parsed_line:
            continue
        try:
            if isinstance(parsed_line, unicode):
                parsed_line = parsed_line.encode(encoding)
            sys.stdout.write(parsed_line)
        except Exception as ex:
            LOGGER.error(str(ex))
Beispiel #11
0
def scrap_all_comments(comments_url, max_workers=128):
    comments_url_iterator = iter(comments_url)
    pbar = tqdm(total=len(comments_url))
    with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor:

        futures = {}
        for book_url, comment_url in itertools.islice(
            comments_url_iterator, max_workers
        ):
            futures_executor = executor.submit(scrap_comments, comment_url=comment_url)
            futures.update({futures_executor: book_url})
        while futures:
            done, _ = concurrent.futures.wait(
                futures, return_when=concurrent.futures.FIRST_COMPLETED
            )
            for future in done:
                pbar.update(1)
                book_url = futures[future]
                futures.pop(future)
                try:
                    comments = future.result()
                except Exception as exc:
                    tqdm.write(f"{book_url} generated an exception: {exc}")
                else:
                    with DimnaDatabase(db_path, logger) as db:
                        db.update_page_visit_status(
                            base_url, book_url, True,
                        )
                        db.insert_all_rating(base_url, comments)
            for book_url, comment_url in itertools.islice(
                comments_url_iterator, len(done)
            ):
                futures_executor = executor.submit(
                    scrap_comments, comment_url=comment_url
                )
                futures.update({futures_executor: book_url})
    pbar.close()
Beispiel #12
0
    def _map_gen(self,
                 futures: List[ConcurrentFuture],
                 end_time=None) -> Generator:
        """The generator that ``map`` return when ``awaitable`` is False."""

        try:
            while futures:
                future = futures.pop(0)
                if end_time is not None:
                    yield from future.result(end_time - time.time())
                else:
                    yield from future.result()
        # Finally clause for generator exit and timeout.
        finally:
            future.cancel()  # This future may have been done.
            for future in futures:
                future.cancel()
def process_urls_parallel(analysis_urls, script_file, container_timeout,
                          max_containers):
    futures = {}
    processed_url_ids = set()
    urls = analysis_urls.copy()
    with concurrent.futures.ProcessPoolExecutor(
            max_workers=max_containers) as executor:
        while len(urls) > 0:
            ## Submit jobs to container ##
            for i in range(min(len(urls), max_containers)):
                id = urls.keys()[0]
                itm = urls.pop(id)
                url = itm['url']
                visit_count = itm['count']
                if i != 0 and i % 5 == 0:
                    time.sleep(200)
                if visit_count == 0:
                    ## initiates docker container for the first time
                    futures[executor.submit(initiate_container, url, str(id),
                                            script_file, visit_count,
                                            container_timeout)] = (str(id),
                                                                   visit_count)
                else:
                    ## Resumes docker container and waits for notifications
                    futures[executor.submit(resume_container, url, str(id),
                                            script_file, visit_count,
                                            container_timeout)] = (str(id),
                                                                   visit_count)

            try:
                ##  Keep docker container active for specific duration and stop the containe and export data
                for future in concurrent.futures.as_completed(
                        futures, timeout=container_timeout):
                    id, v_count = futures.pop(future)
                    try:
                        logging.info(get_time() + 'Container_' + str(id) +
                                     ': Completed successfully!!')
                    except concurrent.futures.TimeoutError as ex:
                        logging.info(get_time() + 'Container_' + str(id) +
                                     ': Timeout occured!!')
                    except Exception as exc:
                        logging.info(get_time() + 'Container_' + str(id) +
                                     ': Exception ')
                        logging.info(exc)

                    res = export_container(id, v_count)
                    stop_container(id)
                    if res:
                        processed_url_ids.add(id)
            except Exception as e:
                ##  Stop the containers that didn't complete before timeout and export data
                for future in futures.keys():
                    id, v_count = futures.pop(future)
                    try:
                        logging.info(get_time() + 'Container_' + str(id) +
                                     ': Timeout Occured!!')
                    except concurrent.futures.TimeoutError as ex:
                        logging.info(get_time() + 'Container_' + str(id) +
                                     ': Timeout occured!!')
                    except Exception as exc:
                        logging.info(get_time() + 'Container_' + str(id) +
                                     ': Exception ')
                        logging.info(exc)

                    res = export_container(id, v_count)
                    stop_container(id)
                    if res:
                        processed_url_ids.add(id)
    return processed_url_ids
        def generate(
            futures: List[concurrent.futures.Future],
        ) -> Generator[PushResult, None, None]:
            nonlocal reretrieve
            num_cached = 0
            num_pushes = len(pushes)

            for _ in tqdm(range(num_pushes)):
                push = pushes.pop(0)
                cached = futures.pop(0).result()

                semaphore.release()

                # Regenerating a large amount of data when we update the mozci regression detection
                # algorithm is currently pretty slow, so we only regenerate a subset of pushes whenever we
                # run.
                if cached:
                    value, mozci_version = cached

                    # Regenerate results which were generated with an older version of mozci.
                    if reretrieve > 0 and mozci_version != MOZCI_VERSION:
                        cached = None
                        reretrieve -= 1

                    # Regenerate results which don't contain the fix revision.
                    elif len(value) != 5:
                        cached = None

                if cached:
                    num_cached += 1
                    value, mozci_version = cached
                    assert len(value) == 5
                    yield value
                else:
                    logger.info(
                        f"Analyzing {push.rev} at the {granularity} level...")

                    key = cache_key(push)

                    try:
                        if granularity == "label":
                            runnables = push.task_labels
                        elif granularity == "group":
                            runnables = push.group_summaries.keys()
                        elif granularity == "config_group":
                            runnables = push.config_group_summaries.keys()

                        value = (
                            tuple(push.revs),
                            push.backedoutby or push.bustage_fixed_by,
                            tuple(runnables),
                            tuple(push.get_possible_regressions(granularity)),
                            tuple(push.get_likely_regressions(granularity)),
                        )
                        mozci.config.cache.put(
                            key,
                            (value, MOZCI_VERSION),
                            mozci.config["cache"]["retention"],
                        )
                        assert len(value) == 5
                        yield value
                    except mozci.errors.MissingDataError:
                        logger.warning(
                            f"Tasks for push {push.rev} can't be found on ActiveData"
                        )
                    except Exception:
                        traceback.print_exc()

            logger.info(
                f"{num_cached} pushes were already cached out of {num_pushes}")
 def callback(f):
     try:
         print(f.result())
         futures.pop(data)
     except:
         print("Please handle {} for {}.".format(f.exception(), data))
video = av.open(sys.argv[1])
stream = next(s for s in video.streams if s.type == b'video')
frame_count =0 
    

with ThreadPoolExecutor(8) as executor:
    
    futures = []
    for packet in video.demux(stream):
        for frame in packet.decode():
            
            frame_count += 1
            
            # reformat is not very thread happy
            new_frame = frame.reformat(1920, 1080, 'rgb48le')
            futures.append(executor.submit(convert, new_frame, frame_count, 'dpx'))

            #convert(new_frame, frame_count)
            
            while len(futures) > 8 * 4:
                f = futures.pop(0)
                f.result()
    
        if frame_count > 100:
            break
        
    for f in concurrent.futures.as_completed(futures):
        r = f.result()
        
print "completed in %i secs" % (time.time() - start)
        def generate(
            futures: List[concurrent.futures.Future],
        ) -> Generator[PushResult, None, None]:
            num_cached = 0
            num_pushes = len(pushes)

            # Regenerating a large amount of data when we update the mozci regression detection
            # algorithm is currently pretty slow, so we only regenerate a subset of pushes whenever we
            # run.
            to_regenerate = int(os.environ.get("OLD_RESULTS_TO_REGENERATE", 0))

            for _ in tqdm(range(num_pushes)):
                push = pushes.pop(0)
                cached = futures.pop(0).result()

                semaphore.release()

                if cached and to_regenerate > 0:
                    value, mozci_version = cached

                    # Regenerate results which were generated when we were not cleaning
                    # up WPT groups.
                    if granularity == "group" and any(
                        runnable.startswith("/") for runnable in value[1]
                    ):
                        cached = None
                        to_regenerate -= 1

                    # Regenerate results which were generated when we didn't get a correct
                    # configuration for test-verify tasks.
                    elif granularity == "config_group" and any(
                        "test-verify" in runnable[0] for runnable in value[1]
                    ):
                        cached = None
                        to_regenerate -= 1

                    # Regenerate results which were generated with an older version of mozci.
                    elif mozci_version != MOZCI_VERSION:
                        cached = None
                        to_regenerate -= 1

                if cached:
                    num_cached += 1
                    value, mozci_version = cached
                    yield value
                else:
                    logger.info(f"Analyzing {push.rev} at the {granularity} level...")

                    key = cache_key(push)

                    try:
                        if granularity == "label":
                            runnables = push.task_labels
                        elif granularity == "group":
                            runnables = push.group_summaries.keys()
                        elif granularity == "config_group":
                            runnables = push.config_group_summaries.keys()

                        value = (
                            push.revs,
                            tuple(runnables),
                            tuple(push.get_possible_regressions(granularity)),
                            tuple(push.get_likely_regressions(granularity)),
                        )
                        adr.config.cache.put(
                            key,
                            (value, MOZCI_VERSION),
                            adr.config["cache"]["retention"],
                        )
                        yield value
                    except adr.errors.MissingDataError:
                        logger.warning(
                            f"Tasks for push {push.rev} can't be found on ActiveData"
                        )
                    except Exception:
                        traceback.print_exc()

            logger.info(f"{num_cached} pushes were already cached out of {num_pushes}")
Beispiel #18
0
    async def execute(self):
        if not self._bulk_mode:
            raise Exception("No Bulk request started")
        if len(self._bulk_data) == 0:
            return []

        data = self._bulk_data[:]
        self._bulk_data = []
        futures = self._bulk_futures.copy()
        self._bulk_futures = {}
        req_start = time.time()

        for fn in self.middleware.before_request:
            res = fn(data)
            if asyncio.iscoroutine(res):
                res = await res
            if res is not None:
                data = res

        retries = 0
        while True:
            try:
                resp = await self._httpclient.fetch(
                    self._url,
                    method="POST",
                    body=data,
                    request_timeout=60.0 # higher request timeout than other operations
                )
            except concurrent.futures.CancelledError:
                raise
            except Exception as e:
                if self.should_retry and isinstance(e, HTTPError) and (e.status == 599 or e.status == 502):
                    # always retry after 599
                    pass
                elif not self.should_retry or time.time() - req_start >= self._request_timeout:
                    # give up after the request timeout
                    raise
                if retries == 0:
                    logger = self.log.exception
                else:
                    logger = self.log.error
                logger("Error in JsonRPCClient.execute: retry {}".format(retries))
                retries += 1
                await asyncio.sleep(random.random())
                continue
            break

        rvals = await resp.json()
        for fn in self.middleware.after_request:
            res = fn(data, rvals)
            if asyncio.iscoroutine(res):
                res = await res
            if res is not None:
                rvals = res

        results = []
        for rval in rvals:
            if 'id' not in rval:
                continue
            future, result_processor = futures.pop(rval['id'], (None, None))
            if future is None:
                self.log.warning("Got unexpected id in jsonrpc bulk response")
                continue
            if "error" in rval:
                future.set_exception(JsonRPCError(rval['id'], rval['error']['code'], rval['error']['message'], rval['error']['data'] if 'data' in rval['error'] else None))
                result = None
            else:
                if result_processor:
                    result = result_processor(rval['result'])
                else:
                    result = rval['result']
                future.set_result(result)
            results.append(result)

        if len(futures):
            self.log.warning("Found some unprocessed requests in bulk jsonrpc request")
            for future, result_processor in futures:
                future.set_exception(Exception("Unexpectedly missing result"))

        return results
Beispiel #19
0
def summary_tables(name, tables_only=False):
    """
    Creates the summary tables in a schema.

    :param str name: the schema's name
    :param boolean tables_only: whether to create SQL tables instead of SQL views
    """
    logger = logging.getLogger('ocdskingfisher.summarize.summary-tables')

    start = time()

    files = {
        directory: sql_files(directory, tables_only=tables_only)
        for directory in ('initial', 'middle', 'final')
    }
    graph = dependency_graph(files['middle'])

    def run(directory):
        """
        Runs the files in a directory in sequence.

        :param str directory: a sub-directory containing SQL files
        """
        for identifier, content in files[directory].items():
            _run_file(name, identifier, content)

    def submit(identifier):
        """
        If a file's dependencies are met, removes it from the dependency graph and submits it.

        :param str identifier: the identifier of a SQL file
        """
        if not graph[identifier]:
            graph.pop(identifier)
            futures[executor.submit(_run_file, name, identifier,
                                    files['middle'][identifier])] = identifier

    # The initial files are fast, and don't need multiprocessing.
    run('initial')

    futures = {}
    with concurrent.futures.ProcessPoolExecutor() as executor:
        # Submit files whose dependencies are met.
        for identifier in list(graph):
            submit(identifier)

        # The for-loop terminates after its given futures, so it needs to start again with new futures.
        while futures:
            for future in concurrent.futures.as_completed(futures):
                future.result()
                done = futures.pop(future)

                # Update dependencies, and submit files whose dependencies are met.
                for identifier in list(graph):
                    graph[identifier].discard(done)
                    submit(identifier)

    # The final files are fast, and can also deadlock.
    run('final')

    logger.info('Total time: %ss', time() - start)
    mongo_download = MongoDownload(mongo_obj)
    tournaments_scraped = mongo_download.getTournamentsScraped()
    filter_tournaments = tournament_df[~tournament_df[['Name', 'Year']].apply(tuple, 1).isin(tournaments_scraped)]
    tournaments = filter_tournaments.apply(lambda row: TournamentRun(row[0], row[1], mongo_obj, main_logger),
                                           axis=1).tolist()
    iter_tournaments = iter(tournaments)

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_drivers) as executor:
        # Only schedule max_drivers amount of futures to start
        futures = {
            executor.submit(tournament.runTournament, None, True): tournament
            for tournament in itertools.islice(iter_tournaments, max_drivers)
        }

        while futures:
            # Wait for the next future to complete.
            finished, _ = concurrent.futures.wait(
                futures, return_when=concurrent.futures.FIRST_COMPLETED
            )

            for future in finished:
                # get the completed tournament
                completed_tournament = futures.pop(future)
                main_logger.info('{}'.format(future.result()))

            for tournament in itertools.islice(iter_tournaments, len(finished)):
                future = executor.submit(tournament.runTournament, None, True)
                futures[future] = tournament

    failed_scrape_df = pd.DataFrame(columns=['Name', 'Year'], data=tournaments[0].failed_scrape_list)
    failed_scrape_df.to_csv('tournaments/FailedTournamentList.csv', index=False, header=True)