Exemple #1
0
def futures_executor(items,
                     function,
                     accumulator,
                     workers=1,
                     status=True,
                     unit='items',
                     desc='Processing',
                     **kwargs):
    """Execute using multiple local cores using python futures

    Parameters
    ----------
        items : list
            List of input arguments
        function : function
            A function to be called on each input, which returns an accumulator instance
        accumulator : AccumulatorABC
            An accumulator to collect the output of the function
        workers : int
            Number of parallel processes for futures
        status : bool
            If true (default), enable progress bar
        unit : str
            Label of progress bar unit
        desc : str
            Label of progress bar description
    """
    with concurrent.futures.ProcessPoolExecutor(
            max_workers=workers) as executor:
        futures = set()
        futures.update(
            executor.submit(function, item, **kwargs) for item in items)
        futures_handler(futures, accumulator, status, unit, desc)
    return accumulator
Exemple #2
0
def get_all_comments(restaurants_url, pages_tracker={}, max_workers=64):

    restaurants_url_to_do_iterator = iter(restaurants_url)
    pages_comments = []
    pbar = tqdm(total=len(restaurants_url))
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:

        futures = {}
        for restaurant_url, page_number in itertools.islice(
            restaurants_url_to_do_iterator, max_workers
        ):

            futures_executor = executor.submit(
                get_page_comments, restaurant_url, page_number
            )
            futures.update({futures_executor: restaurant_url})
        while futures:
            done, _ = concurrent.futures.wait(
                futures, return_when=concurrent.futures.FIRST_COMPLETED
            )
            for future in done:
                pbar.update(1)
                restaurant_url = futures[future]
                futures.pop(future)

                try:
                    comments = future.result()
                except Exception as exc:
                    tqdm.write(f"{restaurant_url} generated an exception: {exc}")
                else:

                    if pages_tracker:
                        pages_tracker[restaurant_url][1] += 1
                        if (
                            pages_tracker[restaurant_url][1]
                            >= pages_tracker[restaurant_url][0]
                        ):
                            with DimnaDatabase(db_path, logger) as db:
                                db.update_page_visit_status(
                                    base_url, restaurant_url, True,
                                )

                    pages_comments.append(comments)

                    with DimnaDatabase(db_path, logger) as db:
                        for comment, rating in comments["comments"]:
                            db.insert_rating(
                                base_url, comment.replace("\x00", ""), rating
                            )

            for restaurant_url, page_number in itertools.islice(
                restaurants_url_to_do_iterator, len(done)
            ):
                futures_executor = executor.submit(
                    get_page_comments, restaurant_url, page_number
                )
                futures.update({futures_executor: restaurant_url})

    pbar.close()
    return pages_comments
Exemple #3
0
def futures_executor(items, function, accumulator, workers=2, status=True, unit='items', desc='Processing',
                     function_args={}):
    with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:
        futures = set()
        try:
            futures.update(executor.submit(function, item, **function_args) for item in items)
            with tqdm(disable=not status, unit=unit, total=len(futures), desc=desc) as pbar:
                while len(futures) > 0:
                    finished = set(job for job in futures if job.done())
                    for job in finished:
                        accumulator += job.result()
                        pbar.update(1)
                    futures -= finished
                    del finished
                    time.sleep(1)
        except KeyboardInterrupt:
            for job in futures:
                job.cancel()
            if status:
                print("Received SIGINT, killed pending jobs.  Running jobs will continue to completion.")
                print("Running jobs:", sum(1 for j in futures if j.running()))
        except Exception:
            for job in futures:
                job.cancel()
            raise
    return accumulator
Exemple #4
0
def scrap_all_comments(base_url, urls, max_workers=256):

    urls_to_do = [url for (_, url, is_visited) in urls if not is_visited]
    urls_to_do_iterator = iter(urls_to_do)

    pbar = tqdm(initial=len(urls) - len(urls_to_do), total=len(urls))
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:

        futures = {}
        for url in itertools.islice(urls_to_do_iterator, max_workers):
            futures_executor = executor.submit(scrap_comments, url=url)
            futures.update({futures_executor: url})
        while futures:
            done, _ = concurrent.futures.wait(
                futures, return_when=concurrent.futures.FIRST_COMPLETED
            )
            for future in done:
                pbar.update(1)
                url = futures[future]
                futures.pop(future)
                try:
                    comments = future.result()
                except Exception as exc:
                    tqdm.write(f"{url} generated an exception: {exc}")
                else:
                    with DimnaDatabase(db_path, logger) as db:
                        db.update_page_visit_status(
                            base_url, url, True,
                        )
                        if comments:
                            db.insert_all_rating(base_url, comments)
            for url in itertools.islice(urls_to_do_iterator, len(done)):
                futures_executor = executor.submit(scrap_comments, url=url)
                futures.update({futures_executor: url})
    pbar.close()
Exemple #5
0
def futuresum(tmp_arr):
    while np.size(tmp_arr) > 1:
        chunk_sum = []
        chunk_tmp_arr = np.array_split(tmp_arr, int(np.size(tmp_arr) / 2))
        if len(chunk_tmp_arr) > 1:
            with concurrent.futures.ProcessPoolExecutor(
                    max_workers=16) as executor:
                futures = set()
                futures.update(
                    executor.submit(add, chunk_tmp_arr[i])
                    for i in range(0, len(chunk_tmp_arr)))
                if (len(futures) == 0): continue
                try:
                    total = len(futures)
                    processed = 0
                    while len(futures) > 0:
                        finished = set(job for job in futures if job.done())
                        for job in finished:
                            chunk_i = job.result()
                            chunk_sum.append(chunk_i)
                        futures -= finished
                    del finished
                except KeyboardInterrupt:
                    print("Ok quitter")
                    for job in futures:
                        job.cancel()
                except:
                    for job in futures:
                        job.cancel()
                    raise
        else:
            chunk_sum.append(add(chunk_tmp_arr[0]))
        tmp_arr = np.array(chunk_sum)
        print(tmp_arr)
    return tmp_arr
Exemple #6
0
def find_all_doctors_url(base_url, cities_url, max_workers=128):
    cities_url_iterator = iter(cities_url)
    pbar = tqdm(total=len(cities_url))
    with concurrent.futures.ThreadPoolExecutor(
            max_workers=max_workers) as executor:

        futures = {}
        for city_url in itertools.islice(cities_url_iterator, max_workers):
            futures_executor = executor.submit(find_doctors_url,
                                               base_url=base_url,
                                               city_url=city_url)
            futures.update({futures_executor: city_url})
        while futures:
            done, _ = concurrent.futures.wait(
                futures, return_when=concurrent.futures.FIRST_COMPLETED)
            for future in done:
                pbar.update(1)
                city_url = futures[future]
                futures.pop(future)
                try:
                    doctors_url = future.result()
                except Exception as exc:
                    tqdm.write(f"{city_url} generated an exception: {exc}")
                else:
                    with DimnaDatabase(db_path, logger) as db:
                        db.insert_all_pages_url(base_url, doctors_url)
            for city_url in itertools.islice(cities_url_iterator, len(done)):
                futures_executor = executor.submit(find_doctors_url,
                                                   base_url=base_url,
                                                   city_url=city_url)
                futures.update({futures_executor: city_url})
    pbar.close()
Exemple #7
0
def futures_executor(items, function, accumulator, workers=1, status=True, unit='items', desc='Processing',
                     **kwargs):
    with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:
        futures = set()
        futures.update(executor.submit(function, item, **kwargs) for item in items)
        futures_handler(futures, accumulator, status, unit, desc)
    return accumulator
Exemple #8
0
def batched_pool_runner(f, iterable, pool, batch_size):
    it = iter(iterable)
    # Submit the first batch of tasks.
    futures = set(pool.submit(f, x) for x in islice(it, batch_size))
    while futures:
        done, futures = concurrent.futures.wait(
            futures, return_when=concurrent.futures.FIRST_COMPLETED)
        # Replenish submitted tasks up to the number that completed.
        futures.update(pool.submit(f, x) for x in islice(it, len(done)))
        for d in done:
            yield d
Exemple #9
0
def write_data(dataset, topicname):
    for eachline in dataset:
        msg = str(eachline)
        futures.update({msg: None})
        # When you publish a message, the client returns a future.
        future = publisher.publish(
            topicname,
            msg.encode("utf-8")  # data must be a bytestring.
        )
        futures[msg] = future
        # Publish failures shall be handled in the callback function.
        future.add_done_callback(get_callback(future, msg))
Exemple #10
0
def find_all_comments_pages(pages_url, max_workers=128):
    book_url_to_do = [
        book_url for (_, book_url, is_visited) in pages_url if not is_visited
    ]
    book_url_to_do_iterator = iter(book_url_to_do)
    pbar = tqdm(initial=len(pages_url) - len(book_url_to_do), total=len(pages_url))
    comments_url = list()
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {}
        for book_url in itertools.islice(book_url_to_do_iterator, max_workers):
            book_id, book_name = book_url.split("/")[-2:]
            first_comment_url = f"{comments_base_url}/{book_id}/{book_name}.json"
            futures_executor = executor.submit(
                find_number_of_comments, comment_url=first_comment_url
            )
            futures.update({futures_executor: book_url})
        while futures:
            done, _ = concurrent.futures.wait(
                futures, return_when=concurrent.futures.FIRST_COMPLETED
            )
            for future in done:
                pbar.update(1)
                book_url = futures[future]
                futures.pop(future)
                book_id, book_name = book_url.split("/")[-2:]
                try:
                    num_pages = future.result()
                except Exception as exc:
                    tqdm.write(f"{book_url} generated an exception: {exc}")
                else:
                    if num_pages:
                        for page in range(1, num_pages + 1):
                            comment_url = f"{comments_base_url}/{book_id}/{book_name}.json?p={page}"
                            comments_url.append([book_url, comment_url])
                    else:
                        with DimnaDatabase(db_path, logger) as db:
                            db.update_page_visit_status(
                                base_url, book_url, True,
                            )
            for book_url in itertools.islice(book_url_to_do_iterator, len(done)):
                book_id, book_name = book_url.split("/")[-2:]
                first_comment_url = f"{comments_base_url}/{book_id}/{book_name}.json"
                futures_executor = executor.submit(
                    find_number_of_comments, comment_url=first_comment_url
                )
                futures.update({futures_executor: book_url})
    pbar.close()
    return comments_url
Exemple #11
0
def render(modelname):

    with open('data/' + modelname + '.model') as fin:
        model = pickle.load(fin)

    model_arr = []
    for ch in model:
        print('generating model for channel', ch.name)
        small_model = rl.Model(ch.name.encode("ascii"))
        small_model.addChannel(model[ch.name])
        model_arr.append(small_model)
    print(model_arr)
    print('Rendering')

    #for i in range(0,len(model_arr)):
    #    futurerender(model_arr[i], modelname)

    with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor:
        futures = set()
        futures.update(
            executor.submit(futurerender, model_arr[i], modelname)
            for i in range(0, len(model_arr)))
        try:
            total = len(futures)
            processed = 0
            while len(futures) > 0:
                finished = set(job for job in futures if job.done())
                for job in finished:
                    job.result()
                futures -= finished
            del finished
        except KeyboardInterrupt:
            print("Ok quitter")
            for job in futures:
                job.cancel()
        except:
            for job in futures:
                job.cancel()
            raise
Exemple #12
0
def read_write_file(bucket_name, filename, topicname):
    bucket = storage_client.get_bucket(bucket_name)  # get bucket data as blob
    blob = bucket.get_blob(filename)  # convert to string
    data = ndjson.loads(blob.download_as_string())
    for eachline in data:
        df = pandas.DataFrame(eachline["Itinerary"].split("-"))
        df.columns = ['AirportCode']
        df_inner = pandas.merge(df, dataframe, on='AirportCode', how='inner')
        x = df_inner['CountryName'].unique()
        trip_type = 'International' if x.size > 1 else 'Domestic'
        y = df_inner.to_dict('r')
        eachline['CountryList'] = y
        eachline['Trip_type'] = trip_type
        msg = str(eachline)
        futures.update({msg: None})
        # When you publish a message, the client returns a future.
        future = publisher.publish(
            topicname,
            msg.encode("utf-8")  # data must be a bytestring.
        )
        futures[msg] = future
        # Publish failures shall be handled in the callback function.
        future.add_done_callback(get_callback(future, msg))
Exemple #13
0
def scrap_all_comments(comments_url, max_workers=128):
    comments_url_iterator = iter(comments_url)
    pbar = tqdm(total=len(comments_url))
    with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor:

        futures = {}
        for book_url, comment_url in itertools.islice(
            comments_url_iterator, max_workers
        ):
            futures_executor = executor.submit(scrap_comments, comment_url=comment_url)
            futures.update({futures_executor: book_url})
        while futures:
            done, _ = concurrent.futures.wait(
                futures, return_when=concurrent.futures.FIRST_COMPLETED
            )
            for future in done:
                pbar.update(1)
                book_url = futures[future]
                futures.pop(future)
                try:
                    comments = future.result()
                except Exception as exc:
                    tqdm.write(f"{book_url} generated an exception: {exc}")
                else:
                    with DimnaDatabase(db_path, logger) as db:
                        db.update_page_visit_status(
                            base_url, book_url, True,
                        )
                        db.insert_all_rating(base_url, comments)
            for book_url, comment_url in itertools.islice(
                comments_url_iterator, len(done)
            ):
                futures_executor = executor.submit(
                    scrap_comments, comment_url=comment_url
                )
                futures.update({futures_executor: book_url})
    pbar.close()
Exemple #14
0
    def run(self):
        """
        Starts the load test by instantiating required number of SignupUser and MultiUser instances.
        It increases the number of user instances according to the total_qps_increase_rate after every minute.
        It collects and returns the results returned by all user instances after load test is completed.
        """
        initial_signup_instances_count = self.get_initial_signup_instances_count()
        initial_multi_instances_count = self.get_initial_multi_instances_count()
        signup_qps_increase_rate = self.get_signup_qps_increase_rate()
        multi_qps_increase_rate = self.get_multi_qps_increase_rate()

        # end_time is 1 minute more than the duration for getting responses of requests sent in the last minute
        end_time = datetime.now() + timedelta(minutes=self.duration + 1)
        users.User.end_time = end_time
        users.MultiUser.user_details = LoadTest.all_users
        users.MultiUser.chat_details = LoadTest.all_chats
        users.MultiUser.distribution = self.get_api_distribution_without_signup()
        users.User.total_qps = 0

        results = []
        total_iterations = self.duration
        completed_iterations = 0

        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_qps) as executor:
            signup_instances = get_signup_user_instances(initial_signup_instances_count)
            multi_instances = get_multi_user_instances(initial_multi_instances_count, self.all_users_index)
            self.all_users_index += initial_multi_instances_count
            futures = {executor.submit(_.simulate) for _ in signup_instances}
            futures.update({executor.submit(_.simulate) for _ in multi_instances})
            while completed_iterations < total_iterations:
                time.sleep(60)
                signup_instances = get_signup_user_instances(signup_qps_increase_rate)
                multi_instances = get_multi_user_instances(multi_qps_increase_rate, self.all_users_index)
                self.all_users_index += multi_qps_increase_rate
                futures.update({executor.submit(_.simulate) for _ in signup_instances})
                futures.update({executor.submit(_.simulate) for _ in multi_instances})
                completed_iterations += 1
            for future in concurrent.futures.as_completed(futures):
                results.extend(future.result())
        return results
Exemple #15
0
    "Bu2KJpsi2KMuMu_inclusive": "{}/v1_0/files_BuToJpsiK_SoftQCDnonD.txt".format(skim_directory),
}


nworkers = 1
#fileslice = slice(None, 5)
fileslice = slice(None)
nevents = {}
#with concurrent.futures.ThreadPoolExecutor(max_workers=nworkers) as executor:
with concurrent.futures.ProcessPoolExecutor(max_workers=nworkers) as executor:
    futures = set()
    for dataset, filelistpath in in_txt.items():
      with open(filelistpath) as filelist:
        files = [x.strip() for x in filelist.readlines()]
      print(files)
      futures.update(executor.submit(process_file, dataset, f) for f in files)
      nevents[dataset] = 0
    try:
        total = len(futures)
        processed = 0
        while len(futures) > 0:
            finished = set(job for job in futures if job.done())
            for job in finished:
                dataset, nentries, rhistograms = job.result()
                nevents[dataset] += nentries
                for k in rhistograms.keys():
                    histograms[k] += rhistograms[k]
                processed += 1
                print("Processing: done with % 4d / % 4d files" % (processed, total))
            futures -= finished
        del finished
Exemple #16
0
    if run.size == 0: return dataset, lumi_tools.LumiList()
    lumilist = lumi_tools.LumiList(run, lumi)
    return dataset, lumilist


dataset_lumi = {}
nworkers = 12

with concurrent.futures.ProcessPoolExecutor(max_workers=nworkers) as executor:
    futures = set()
    print(samples.keys())
    for dataset, files in samples.items():
        splitFiles = slice_it(files['files'], slices)
        for iL, iList in enumerate(splitFiles):
            futures.update(
                executor.submit(get_lumilist, dataset, file, files['treename'])
                for file in iList)
    try:
        total = len(futures)
        processed = 0
        while len(futures) > 0:
            finished = set(job for job in futures if job.done())
            for job in finished:
                dataset, accumulator = job.result()
                if dataset in dataset_lumi:
                    dataset_lumi[dataset] += accumulator
                else:
                    dataset_lumi[dataset] = accumulator
                processed += 1
                if processed % 10 == 0:
                    print("Processing: done with % 4d / % 4d files" %
Exemple #17
0
def scan_profile(url, org, fid, timeout=None):
    """
    Report permutations of OFX version/prettyprint/unclosed_elements that
    successfully download OFX profile from server.

    Returns a pair of (OFXv1 results, OFXv2 results), each type(dict).
    dict values provide ``ofxget`` configs that will work to connect.
    """
    if timeout is None:
        timeout = 5

    ofxv1 = [102, 103, 151, 160]
    ofxv2 = [200, 201, 202, 203, 210, 211, 220]

    futures = {}
    client = OFXClient(url, org, fid)
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        for prettyprint in (False, True):
            for close_elements in (False, True):
                futures.update({
                    executor.submit(client.request_profile,
                                    version=version,
                                    prettyprint=prettyprint,
                                    close_elements=close_elements,
                                    timeout=timeout):
                    (version, prettyprint, close_elements)
                    for version in ofxv1
                })

            futures.update({
                executor.submit(client.request_profile,
                                version=version,
                                prettyprint=prettyprint,
                                close_elements=True,
                                timeout=timeout): (version, prettyprint, True)
                for version in ofxv2
            })

    working = defaultdict(list)

    for future in concurrent.futures.as_completed(futures):
        try:
            response = future.result()
        except (urllib.error.URLError, urllib.error.HTTPError, ConnectionError,
                OSError) as exc:
            cancelled = future.cancel()
            continue
        else:
            (version, prettyprint, close_elements) = futures[future]
            working[version].append((prettyprint, close_elements))

    def collate_results(results):
        results = list(results)
        if not results:
            return [], []
        versions, formats = zip(*results)

        # Assumption: the same formatting requirements apply to all
        # sub-versions (e.g. 1.0.2 and 1.0.3, or 2.0.3 and 2.2.0).
        # If a (pretty, close_elements) pair succeeds on most sub-versions
        # but fails on a few, we'll chalk it up to network transmission
        # errors and ignore it.
        #
        # Translation: just pick the longest sequence of successful
        # formats and assume it applies to the whole version.
        formats = max(formats, key=len)
        formats.sort()
        formats = [
            OrderedDict([("pretty", format[0]),
                         ("unclosed_elements", not format[1])])
            for format in formats
        ]
        return sorted(list(versions)), formats

    v2, v1 = utils.partition(lambda pair: pair[0] < 200, working.items())
    v1_versions, v1_formats = collate_results(v1)
    v2_versions, v2_formats = collate_results(v2)

    # V2 always has closing tags for elements; just report prettyprint
    for format in v2_formats:
        del format["unclosed_elements"]

    return json.dumps((OrderedDict([("versions", v1_versions),
                                    ("formats", v1_formats)]),
                       OrderedDict([("versions", v2_versions),
                                    ("formats", v2_formats)])))
Exemple #18
0
def process_recursive(directories,
                      *,
                      album_gain=False,
                      opus_output_gain=False,
                      mtime_second_offset=None,
                      skip_tagged=False,
                      thread_count=None,
                      ffmpeg_path=None,
                      dry_run=False,
                      report=False):
    """ Analyze and tag all audio files recursively found in input directories. """
    error_count = 0

    # walk directories
    albums_filepaths = []
    walk_stats = collections.OrderedDict(((k, 0) for k in ("files", "dirs")))
    with dynamic_tqdm(desc="Analyzing directories",
                      unit=" dir",
                      postfix=walk_stats,
                      leave=False) as progress:
        for input_directory in directories:
            for root_dir, subdirs, filepaths in os.walk(input_directory,
                                                        followlinks=False):
                audio_filepaths = tuple(
                    map(functools.partial(os.path.join, root_dir),
                        filter(is_audio_filepath, filepaths)))
                if audio_filepaths:
                    albums_filepaths.append(audio_filepaths)

                if progress is not None:
                    walk_stats["files"] += len(filepaths)
                    walk_stats["dirs"] += 1
                    progress.set_postfix(walk_stats, refresh=False)
                    progress.update(1)

    # get optimal thread count
    if thread_count is None:
        thread_count = OPTIMAL_THREAD_COUNT

    executor = concurrent.futures.ThreadPoolExecutor(max_workers=thread_count)
    start_evt = threading.Event()
    futures = {}

    with dynamic_tqdm(total=len(albums_filepaths),
                      desc="Building work queue",
                      unit=" albums",
                      leave=False) as progress:
        # analysis futures
        for album_filepaths in albums_filepaths:
            dir_futures = scan(album_filepaths,
                               album_gain=album_gain,
                               skip_tagged=skip_tagged,
                               ffmpeg_path=ffmpeg_path,
                               executor=executor,
                               start_evt=start_evt)
            dir_futures = {
                k: (tuple(f for f in dir_futures.keys() if f is not k), v)
                for k, v in dir_futures.items()
            }
            futures.update(dir_futures)

            if progress is not None:
                progress.update(1)

    with dynamic_tqdm(total=sum(map(len, albums_filepaths)) +
                      int(album_gain) * len(albums_filepaths),
                      desc="Analyzing audio loudness",
                      unit=" files",
                      leave=False,
                      smoothing=0) as progress:
        # get results
        start_evt.set()
        pending_futures = futures
        while futures:
            done_futures, pending_futures = concurrent.futures.wait(
                pending_futures,
                return_when=concurrent.futures.FIRST_COMPLETED)
            to_del_futures = set()

            for done_future in done_futures:
                other_dir_futures, _ = futures[done_future]

                if progress is not None:
                    # update progress
                    progress.update(1)

                # ignore futures already processed
                if done_future in to_del_futures:
                    continue

                # only tag when the whole directory is scanned
                dir_futures = (done_future, ) + other_dir_futures
                if not all(f.done() for f in dir_futures):
                    continue

                # get album filepaths
                audio_filepaths = tuple(futures[f][1] for f in dir_futures
                                        if futures[f][1] != ALBUM_GAIN_KEY)

                # get analysis results for this directory
                r128_data = {}
                for dir_future in dir_futures:
                    key = futures[dir_future][1]
                    try:
                        result = dir_future.result()
                    except Exception as e:
                        if album_gain and (key == ALBUM_GAIN_KEY):
                            logger().warning(
                                "Failed to analyze files %s: %s %s" %
                                (", ".join(
                                    repr(audio_filepath)
                                    for audio_filepath in audio_filepaths),
                                 e.__class__.__qualname__, e))
                        else:
                            logger().warning(
                                "Failed to analyze file %r: %s %s" %
                                (key, e.__class__.__qualname__, e))
                        error_count += 1
                    else:
                        if result is not None:
                            r128_data[key] = result

                if report and audio_filepaths:
                    show_scan_report(
                        audio_filepaths,
                        os.path.dirname(audio_filepaths[0])
                        if album_gain else None, r128_data)

                if not dry_run:
                    # tag
                    try:
                        album_loudness, album_peak = r128_data[ALBUM_GAIN_KEY]
                    except KeyError:
                        album_loudness, album_peak = None, None
                    for audio_filepath in audio_filepaths:
                        try:
                            loudness, peak = r128_data[audio_filepath]
                        except KeyError:
                            if album_loudness is None:
                                # file was skipped
                                continue
                            else:
                                loudness, peak = None, None
                        try:
                            tag(audio_filepath,
                                loudness,
                                peak,
                                album_loudness=album_loudness,
                                album_peak=album_peak,
                                opus_output_gain=opus_output_gain,
                                mtime_second_offset=mtime_second_offset)
                        except Exception as e:
                            logger().error(
                                "Failed to tag file '%s': %s %s" %
                                (audio_filepath, e.__class__.__qualname__, e))
                            error_count += 1

                to_del_futures.add(done_future)
                for f in other_dir_futures:
                    to_del_futures.add(f)

            for to_del_future in to_del_futures:
                del futures[to_del_future]

    executor.shutdown(True)

    return error_count
Exemple #19
0
    return val


nworkers = 22
fileslice = slice(None)
with concurrent.futures.ProcessPoolExecutor(max_workers=nworkers) as executor:
    futures = set()
    for dataset, info in datadef.items():
        if options.dataset and options.dataset not in dataset: continue
        for k, v in samples.items():
            if options.selection and options.selection not in k: continue
            for i in range(0, len(v)):
                if v[i] not in dataset: continue
                print(dataset)
                futures.update(
                    executor.submit(analysis, k, options.year,
                                    dataset_xs[dataset], dataset, file)
                    for file in info['files'][fileslice])
#                for file in info['files'][fileslice]:
#                    analysis(k, options.year, dataset_xs[dataset], dataset, file)
        if (len(futures) == 0): continue

        try:
            total = len(futures)
            processed = 0
            while len(futures) > 0:
                finished = set(job for job in futures if job.done())
                for job in finished:
                    dataset, sumws, nentries, hout = job.result()
                    nevents += nentries
                    sumw += sumws
                    for k in hout.keys():