Python LOGGER.exception Exemples, utils.logger.LOGGER.exception Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : script_create_w2v_embeddings.py Projet : davidgengenbach/bachelor-thesis

def process_dataset(dataset_name, args):
    try:
        embeddings_file = '{}/{}.npy'.format(args.embedding_save_path,
                                             dataset_name)
        if not args.force and os.path.exists(embeddings_file): return

        start_time = time()
        LOGGER.info('{:30} Starting'.format(dataset_name))

        X, Y = dataset_helper.get_dataset(dataset_name=dataset_name)
        X = preprocessing.preprocess_text_spacy(X, n_jobs=args.n_jobs_spacy)
        X = [[word.text.lower().strip() for word in doc] for doc in X]

        model = w2v_d2v.train_w2v(X,
                                  min_count=args.embedding_min_count,
                                  size=args.embedding_size,
                                  iter=args.embedding_iter,
                                  workers=args.n_jobs_w2v)

        word_vectors = model.wv
        del model

        with open(embeddings_file, 'wb') as f:
            pickle.dump(word_vectors, f)

        duration_in_s = time_utils.seconds_to_human_readable(time() -
                                                             start_time)
        LOGGER.info('{:30} Finished (time={})'.format(dataset_name,
                                                      duration_in_s))
    except Exception as e:
        LOGGER.exception(e)

Exemple #2

0

Afficher le fichier

Fichier : save.py Projet : linjieli222/HERO

def save_training_meta(args):
    # Comment out, since rank is not saved to args. Safeguard save_training_meta already in training scripts.
    # if args.rank > 0:
    #    return

    # args is an EasyDict object, treat it the same as a normal dict
    os.makedirs(join(args.output_dir, 'log'), exist_ok=True)
    os.makedirs(join(args.output_dir, 'ckpt'), exist_ok=True)

    # training args
    save_args_path = join(args.output_dir, 'log', 'hps.json')
    save_json(vars(args), save_args_path, save_pretty=True)

    # model args
    model_config = load_json(args.model_config)
    save_model_config_path = join(args.output_dir, 'log', 'model_config.json')
    save_json(model_config, save_model_config_path, save_pretty=True)
    # git info
    try:
        LOGGER.info("Waiting on git info....")
        c = subprocess.run(["git", "rev-parse", "--abbrev-ref", "HEAD"],
                           timeout=10,
                           stdout=subprocess.PIPE)
        git_branch_name = c.stdout.decode().strip()
        LOGGER.info("Git branch: %s", git_branch_name)
        c = subprocess.run(["git", "rev-parse", "HEAD"],
                           timeout=10,
                           stdout=subprocess.PIPE)
        git_sha = c.stdout.decode().strip()
        LOGGER.info("Git SHA: %s", git_sha)
        git_dir = abspath(dirname(__file__))
        git_status = subprocess.check_output(['git', 'status', '--short'],
                                             cwd=git_dir,
                                             universal_newlines=True).strip()
        with open(join(args.output_dir, 'log', 'git_info.json'),
                  'w') as writer:
            json.dump(
                {
                    'branch': git_branch_name,
                    'is_dirty': bool(git_status),
                    'status': git_status,
                    'sha': git_sha
                },
                writer,
                indent=4)
    except (subprocess.TimeoutExpired, subprocess.CalledProcessError) as e:
        LOGGER.exception(e)
        LOGGER.warn("Git info not found. Saving code into zip instead...")
        # save a copy of the codebase.
        # !!!Do not store heavy file in your codebase when using it.
        code_dir = dirname(dirname(realpath(__file__)))
        code_zip_filename = os.path.join(args.output_dir, "code.zip")
        LOGGER.info(f"Saving code from {code_dir} to {code_zip_filename}...")
        make_zipfile(code_dir,
                     code_zip_filename,
                     enclosing_dir="code",
                     exclude_dirs_substring="results",
                     exclude_dirs=["results", "debug_results", "__pycache__"],
                     exclude_extensions=[".pyc", ".ipynb", ".swap"])
        LOGGER.info("Saving code done.")

Exemple #3

0

Afficher le fichier

Fichier : billy.py Projet : geckiss/billy-bot-discord

    async def _handle_internal_error(self, ctx, ex):
        """ Handler for internal errors for logging into server text room. """
        ex_content = traceback.format_exception(type(ex), ex, ex.__traceback__)
        ex_content = " ".join(e for e in ex_content)

        LOGGER.exception(
            f"Unexpected exception on event: {ctx.message.content}. Args: {ctx.args} Kwargs: {ctx.kwargs}. Content: {ex_content}"
        )

        if not self.logging_channel:
            return

        await self.logging_channel.send(content="```" + ex_content + "```")

Exemple #4

0

Afficher le fichier

Fichier : billy.py Projet : geckiss/billy-bot-discord

    async def on_error(self, event, *args, **kwargs):
        """ Overall exception handler. """
        type_, value, tb = sys.exc_info()
        ex_content = traceback.format_exception(type_, value, tb)
        ex_content = " ".join(e for e in ex_content)
        LOGGER.exception(
            f"Unexpected exception on event: {event}. Args: {args} Kwargs: {kwargs}. Content: {ex_content}"
        )

        if not self.logging_channel:
            return

        await self.logging_channel.send(content="```" + ex_content + "```")

Exemple #5

0

Afficher le fichier

Fichier : save.py Projet : dandelin/UNITER

def save_training_meta(args):
    if args.rank > 0:
        return

    if not exists(args.output_dir):
        os.makedirs(join(args.output_dir, "log"))
        os.makedirs(join(args.output_dir, "ckpt"))

    with open(join(args.output_dir, "log", "hps.json"), "w") as writer:
        json.dump(vars(args), writer, indent=4)
    model_config = json.load(open(args.model_config))
    with open(join(args.output_dir, "log", "model.json"), "w") as writer:
        json.dump(model_config, writer, indent=4)
    # git info
    try:
        LOGGER.info("Waiting on git info....")
        c = subprocess.run(
            ["git", "rev-parse", "--abbrev-ref", "HEAD"],
            timeout=10,
            stdout=subprocess.PIPE,
        )
        git_branch_name = c.stdout.decode().strip()
        LOGGER.info("Git branch: %s", git_branch_name)
        c = subprocess.run(
            ["git", "rev-parse", "HEAD"], timeout=10, stdout=subprocess.PIPE
        )
        git_sha = c.stdout.decode().strip()
        LOGGER.info("Git SHA: %s", git_sha)
        git_dir = abspath(dirname(__file__))
        git_status = subprocess.check_output(
            ["git", "status", "--short"], cwd=git_dir, universal_newlines=True
        ).strip()
        with open(join(args.output_dir, "log", "git_info.json"), "w") as writer:
            json.dump(
                {
                    "branch": git_branch_name,
                    "is_dirty": bool(git_status),
                    "status": git_status,
                    "sha": git_sha,
                },
                writer,
                indent=4,
            )
    except subprocess.TimeoutExpired as e:
        LOGGER.exception(e)
        LOGGER.warn("Git info not found. Moving right along...")

Exemple #6

0

Afficher le fichier

Fichier : billy.py Projet : yungbender/billy-bot-discord

    async def _handle_internal_error(self, ctx, ex):
        """ Handler for internal errors for logging into server text room. """
        ex_content = traceback.format_exception(type(ex), ex, ex.__traceback__)
        ex_content = " ".join(e for e in ex_content)

        # Check for the stupid restart of cogs.
        # TODO: fix this
        if "is already registered." in ex_content:
            return

        LOGGER.exception(
            f"Unexpected exception on event: {ctx.message.content}. Args: {ctx.args} Kwargs: {ctx.kwargs}. Content: {ex_content}"
        )

        if not self.logging_channel:
            return

        await self.logging_channel.send(content="```" + ex_content + "```")

Exemple #7

0

Afficher le fichier

Fichier : billy.py Projet : yungbender/billy-bot-discord

    async def on_error(self, event, *args, **kwargs):
        """ Overall exception handler. """
        type_, value, tb = sys.exc_info()
        ex_content = traceback.format_exception(type_, value, tb)
        ex_content = " ".join(e for e in ex_content)

        # Check for the stupid restart of cogs.
        # TODO: fix this
        if "is already registered." in ex_content:
            return

        LOGGER.exception(
            f"Unexpected exception on event: {event}. Args: {args} Kwargs: {kwargs}. Content: {ex_content}"
        )

        if not self.logging_channel:
            return

        await self.logging_channel.send(content="```" + ex_content + "```")

Exemple #8

0

Afficher le fichier

Fichier : save.py Projet : tiendzung-le/UNITER

def save_training_meta(args):
    if args.rank > 0:
        return

    if not exists(args.output_dir):
        os.makedirs(join(args.output_dir, 'log'))
        os.makedirs(join(args.output_dir, 'ckpt'))

    with open(join(args.output_dir, 'log', 'hps.json'), 'w') as writer:
        json.dump(vars(args), writer, indent=4)
    model_config = json.load(open(args.model_config))
    with open(join(args.output_dir, 'log', 'model.json'), 'w') as writer:
        json.dump(model_config, writer, indent=4)
        
    return # no need to store git info

    # git info
    try:
        LOGGER.info("Waiting on git info....")
        c = subprocess.run(["git", "rev-parse", "--abbrev-ref", "HEAD"],
                           timeout=10, stdout=subprocess.PIPE)
        git_branch_name = c.stdout.decode().strip()
        LOGGER.info("Git branch: %s", git_branch_name)
        c = subprocess.run(["git", "rev-parse", "HEAD"],
                           timeout=10, stdout=subprocess.PIPE)
        git_sha = c.stdout.decode().strip()
        LOGGER.info("Git SHA: %s", git_sha)
        git_dir = abspath(dirname(__file__))
        git_status = subprocess.check_output(
            ['git', 'status', '--short'],
            cwd=git_dir, universal_newlines=True).strip()
        with open(join(args.output_dir, 'log', 'git_info.json'),
                  'w') as writer:
            json.dump({'branch': git_branch_name,
                       'is_dirty': bool(git_status),
                       'status': git_status,
                       'sha': git_sha},
                      writer, indent=4)
    except subprocess.TimeoutExpired as e:
        LOGGER.exception(e)
        LOGGER.warn("Git info not found. Moving right along...")

Exemple #9

0

Afficher le fichier

def process_dataset(dataset_name, args):
    LOGGER.info('{:15} - Start'.format(dataset_name))
    LOGGER.info('{:15} - Retrieving trained embedding'.format(dataset_name))

    pre_trained_embedding = embeddings.get_embedding_model(
        args.pre_trained_embedding,
        binary=False,
        first_line_header=True,
        with_gensim=True)

    try:
        trained_embedding = dataset_helper.get_w2v_embedding_for_dataset(
            dataset_name)
    except FileNotFoundError as e:
        LOGGER.exception(e)
        return

    cmap_cache_files = dataset_helper.get_all_cached_graph_datasets(
        dataset_name=dataset_name, graph_type=constants.TYPE_CONCEPT_MAP)

    coo_cache_files = [
        x for x in dataset_helper.get_all_cached_graph_datasets(
            dataset_name=dataset_name, graph_type=constants.TYPE_COOCCURRENCE)
        if 'all' in x
    ]

    if not len(cmap_cache_files) or not len(coo_cache_files):
        return

    used_graphs = [cmap_cache_files[0], coo_cache_files[0]]

    LOGGER.info('{:15} - Retrieving dataset'.format(dataset_name))
    all_labels = set()
    for graph_cache_file in used_graphs:
        X, _ = dataset_helper.get_dataset_cached(graph_cache_file)
        X = graph_helper.get_graphs_only(X)
        all_labels |= graph_helper.get_all_node_labels_uniq(
            X, as_sorted_list=False)

    LOGGER.info('{:15} - Resolving embeddings'.format(dataset_name))
    embeddings_pre_trained, not_found_pre_trained_coreferenced, not_found_trained, not_found_pre_trained, lookup, similar_els = embeddings.get_embeddings_for_labels_with_lookup(
        all_labels, trained_embedding, pre_trained_embedding)

    LOGGER.info('{:15} - Missing'.format(dataset_name))

    for label, s in [('trained', not_found_trained),
                     ('pre_trained', not_found_pre_trained),
                     ('after_coreference', not_found_pre_trained_coreferenced)
                     ]:
        LOGGER.info('\t{:20} {:>6}'.format(label, len(s)))

    embedding_file = '{}/{}.w2v.txt'.format(args.embeddings_result_folder,
                                            dataset_name)
    embeddings.save_embedding_dict(embeddings_pre_trained, embedding_file)
    embeddings_pre_trained = embeddings.load_word2vec_format(
        fname=embedding_file, binary=False)

    LOGGER.info('{:15} - Co-reference resolution'.format(dataset_name))
    max_topn = max(args.topn)

    similar_labels = coreference.get_most_similar_labels(
        all_labels, embeddings_pre_trained, max_topn)

    for topn in args.topn:
        for threshold in args.merge_threshold:
            LOGGER.info(
                '{:15} - Co-reference resolution: topn: {}, threshold: {}'.
                format(dataset_name, topn, threshold))
            clique_lookup = coreference.create_label_cliques_by_similarity(
                similar_labels, threshold=threshold, topn=topn)

            new_lookup = embeddings.merge_lookups(clique_lookup, lookup)

            with open(
                    '{}/{}.threshold-{}.topn-{}.label-lookup.npy'.format(
                        args.embeddings_result_folder, dataset_name, threshold,
                        topn), 'wb') as f:
                pickle.dump(new_lookup, f)
    LOGGER.info('{:15} - Finished'.format(dataset_name))

Exemple #10

0

Afficher le fichier

def run_classification_task(task: ExperimentTask, cfo: ClassificationOptions,
                            experiment_config: dict):
    helper.set_random_seed()

    args = cfo
    result_filename_tmpl = filename_utils.get_result_filename_for_task(
        task, experiment_config=experiment_config, cfo=cfo)

    result_file = '{}/{}'.format(cfo.results_folder, result_filename_tmpl)
    predictions_file = '{}/{}'.format(cfo.predictions_folder,
                                      result_filename_tmpl)
    classifier_file = '{}/{}'.format(cfo.classifier_folder,
                                     result_filename_tmpl)

    if not cfo.force and os.path.exists(result_file):
        return

    time_checkpoints = {}

    def add_time_checkpoint(name):
        time_checkpoints[name] = time()

    add_time_checkpoint('start')
    X, Y, estimator, param_grid = task.fn()
    add_time_checkpoint('retrieved_data')

    # A good heuristic of whether it's a gram matrix is whether the dimensions are the same
    is_precomputed = isinstance(X, np.ndarray) and X.shape[0] == X.shape[1]

    # This is also a heuristic
    is_dummy = 'classifier__strategy' in param_grid

    # Add classifiers, instantiate transformer classes and merge with experiment config
    param_grid = experiment_helper.prepare_param_grid(task, param_grid,
                                                      experiment_config)

    LOGGER.info('ParamGrid: {}\n\n'.format(
        pipeline_helper.remove_complex_types(param_grid)))

    X_train, Y_train, X_test, Y_test, train_i, test_i = X, Y, [], [], range(
        len(X)), []

    if not is_dummy:  # and cfo.create_predictions:
        # Hold out validation set for predictions
        try:
            X_train, X_test, Y_train, Y_test, train_i, test_i = train_test_split(
                X,
                Y,
                test_size=cfo.prediction_test_size,
                is_precomputed=is_precomputed,
            )
        except Exception as e:
            LOGGER.warning('Could not split dataset for predictions')
            LOGGER.exception(e)

    def get_cv(splits):
        if splits == -1:
            _, _, _, _, X_train_i, X_test_i = train_test_split(
                X_train,
                Y_train,
                test_size=0.33,
                is_precomputed=is_precomputed)
            cv = [(X_train_i, X_test_i)]
        else:
            cv = sklearn.model_selection.StratifiedKFold(
                n_splits=cfo.n_splits,
                shuffle=True,
                random_state=constants.RANDOM_SEED)
        return cv

    add_time_checkpoint('split_data')
    cv = get_cv(cfo.n_splits)

    should_refit = np.all([
        #not cfo.use_nested_cross_validation,
        not is_dummy,
        #cfo.create_predictions or cfo.save_best_clf
    ])

    gscv = GridSearchCV(estimator=estimator,
                        param_grid=param_grid,
                        cv=cv,
                        scoring=cfo.scoring,
                        n_jobs=cfo.n_jobs,
                        verbose=cfo.verbose,
                        refit=cfo.refit if should_refit else False)

    if cfo.use_nested_cross_validation and not is_dummy:
        cv_nested = get_cv(cfo.n_splits_nested)

        LOGGER.info('Using nested cross-validation')

        scores = sklearn.model_selection.cross_validate(
            gscv,
            X,
            Y,
            scoring=cfo.scoring,
            cv=cv_nested,
            n_jobs=cfo.n_jobs_outer,
            verbose=cfo.verbose,
            return_train_score=True)
        result = dict(scores, **param_grid)
        add_time_checkpoint('fitted_nested')
        results_helper.save_results(result,
                                    result_file,
                                    args,
                                    time_checkpoints=time_checkpoints)
        return

    gscv_result = gscv.fit(X_train, Y_train)
    add_time_checkpoint('fitted_gridsearch')

    if not is_dummy and cfo.create_predictions:
        if not len(X_test):
            LOGGER.warning('Validation set for prediction has no items')
        else:
            try:
                # Retrain the best classifier and get prediction on validation set
                Y_test_pred = gscv_result.best_estimator_.predict(X_test)
                add_time_checkpoint('predicted')
                results_helper.save_results(
                    {
                        'gscv_result':
                        remove_coefs_from_results(gscv_result.cv_results_),
                        'all_params':
                        remove_coefs_from_results(param_grid),
                        'best_params':
                        remove_coefs_from_results(gscv_result.best_params_),
                        'Y_real':
                        Y_test,
                        'Y_pred':
                        Y_test_pred,
                        'X_test':
                        X_test,
                    },
                    predictions_file,
                    args,
                    time_checkpoints=time_checkpoints)
            except Exception as e:
                LOGGER.warning('Error while trying to retrain best classifier')
                LOGGER.exception(e)

    if cfo.save_best_clf:
        best_estimator = gscv_result.best_estimator_
        try:
            results_helper.save_results(
                {
                    'params': gscv_result.best_params_,
                    'classifier': best_estimator
                },
                classifier_file,
                args,
                time_checkpoints=time_checkpoints)
        except Exception as e:
            LOGGER.warning('Error while saving best estimator: {}'.format(e))
            LOGGER.exception(e)

    add_time_checkpoint('finished')
    results_helper.save_results(gscv_result.cv_results_,
                                result_file,
                                args,
                                time_checkpoints=time_checkpoints)

Exemple #11

0

Afficher le fichier

def process_graph_cache_file(graph_cache_file, args):
    graph_cache_filename = graph_cache_file.split('/')[-1].rsplit('.')[0]
    dataset = filename_utils.get_dataset_from_filename(graph_cache_file)

    if '.phi.' in graph_cache_filename or not filter_utils.file_should_be_processed(
            graph_cache_filename, args.include_filter, args.exclude_filter,
            args.limit_dataset):
        return

    LOGGER.info('{:15} starting ({})'.format(dataset, graph_cache_filename))

    fast_wl_trans = FastWLGraphKernelTransformer(
        h=args.wl_h, use_early_stopping=False, truncate_to_highest_label=False)

    try:
        phi_graph_cache_file = graph_cache_file.replace('.npy', '.phi.npy')
        X_graphs, Y = dataset_helper.get_dataset_cached(graph_cache_file)
        X_graphs = graph_helper.get_graphs_only(X_graphs)

        # Kernel: WL
        if args.use_wl:
            used_phi_graph_cache_file = phi_graph_cache_file
            splitted_phi_graph_cache_file = phi_graph_cache_file.replace(
                '.phi', '.splitted.phi')
            phi_same_label_graph_cache_file = phi_graph_cache_file.replace(
                dataset, '{}_same-label'.format(dataset)).replace(
                    '.phi', '.splitted.phi')

            # Stop here if all files have already been created
            if not args.force and np.all([
                    os.path.exists(x) for x in
                [
                    splitted_phi_graph_cache_file, used_phi_graph_cache_file,
                    phi_same_label_graph_cache_file
                ]
            ]):
                return

            X_, Y_ = np.array(np.copy(X_graphs)), np.array(np.copy(Y))
            if args.wl_sort_classes:
                X_, Y_ = sort(X_, Y_, by=Y_)

            num_vertices = len(graph_helper.get_all_node_labels(X_))
            fast_wl_trans.set_params(phi_dim=num_vertices)

            X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(
                np.copy(X_),
                np.copy(Y_),
                stratify=Y_,
                test_size=args.wl_test_size)

            X_train, Y_train = sort(X_train, Y_train, by=Y_train)
            X_test, Y_test = sort(X_test, Y_test, by=Y_test)

            # Splitted version
            if args.force or not os.path.exists(splitted_phi_graph_cache_file):
                t = sklearn.base.clone(fast_wl_trans).set_params(
                    same_label=True)
                phi_train = t.fit_transform(np.copy(X_train))
                phi_test = t.transform(np.copy(X_test))

                with open(splitted_phi_graph_cache_file, 'wb') as f:
                    pickle.dump((phi_train, phi_test, X_train, X_test, Y_train,
                                 Y_test), f)

            # Splitted, same label
            if args.force or not os.path.exists(
                    phi_same_label_graph_cache_file):
                t = sklearn.base.clone(fast_wl_trans)
                phi_train = t.fit_transform(X_train)
                phi_test = t.transform(X_test)

                with open(phi_same_label_graph_cache_file, 'wb') as f:
                    pickle.dump((phi_train, phi_test, X_train, X_test, Y_train,
                                 Y_test), f)

            # Whole dataset
            if args.force or not os.path.exists(used_phi_graph_cache_file):
                t = sklearn.base.clone(fast_wl_trans)
                with open(used_phi_graph_cache_file, 'wb') as f:
                    pickle.dump((t.fit_transform(X_), Y_), f)

        # Kernel: spgk
        if args.use_spgk:
            for depth in args.spgk_depth:
                spgk_graph_cache_file = graph_cache_file.replace(
                    '.npy', '.spgk-{}.gram.npy'.format(depth))

                if args.force or not os.path.exists(spgk_graph_cache_file):
                    K = spgk.transform(X_graphs, depth=depth)

                    with open(spgk_graph_cache_file, 'wb') as f:
                        pickle.dump((K, Y), f)
    except Exception as e:
        LOGGER.exception(e)

    LOGGER.info('{:15} finished ({})'.format(dataset, graph_cache_filename))

Exemple #12

0

Afficher le fichier

def start_tasks(args, all_tasks: typing.List[ExperimentTask],
                classification_options: ClassificationOptions,
                experiment_config: dict):
    filtered_task_types = experiment_config['params_per_type'].keys(
    ) if experiment_config else None

    if experiment_config.get('limit_dataset', None) is not None:
        limit_dataset = experiment_config['limit_dataset']
    else:
        limit_dataset = args.limit_dataset

    limit_graph_type = experiment_config.get('limit_graph_type', None)

    def should_process_task(task: ExperimentTask):
        # Dataset filter
        is_filtered_by_dataset = limit_dataset and filename_utils.get_dataset_from_filename(
            task.name) not in limit_dataset

        # Task type filters
        is_filtered_by_include_filter = (args.task_type_include_filter
                                         and task.type
                                         not in args.task_type_include_filter)
        is_filtered_by_exclude_filter = (args.task_type_exclude_filter
                                         and task.type
                                         in args.task_type_exclude_filter)

        is_filtered_by_name_filter = (args.task_name_filter and
                                      args.task_name_filter not in task.name)
        is_filtered_by_param_options = (filtered_task_types and task.type
                                        not in filtered_task_types)
        is_filtered_by_graph_type = (
            limit_graph_type and graph_helper.get_graph_type_from_filename(
                task.name) not in [None] + limit_graph_type)

        # Do not process tasks that have already been calculated (unless args.force == True)
        created_files = [
            '{}/{}'.format(
                args.results_folder,
                filename_utils.get_result_filename_for_task(
                    task, experiment_config, cfo=classification_options))
        ]
        is_filtered_by_file_exists = (not args.force and np.any(
            [os.path.exists(file) for file in created_files]))

        should_process = not np.any([
            is_filtered_by_graph_type, is_filtered_by_dataset,
            is_filtered_by_include_filter, is_filtered_by_name_filter,
            is_filtered_by_file_exists, is_filtered_by_exclude_filter,
            is_filtered_by_param_options
        ])

        return should_process

    def print_tasks(tasks: typing.List[ExperimentTask]):
        for task in tasks:
            print('\t{t.type:26} {dataset:18} {t.name}'.format(
                t=task,
                dataset=filename_utils.get_dataset_from_filename(task.name)))
        print('\n')

    # Filter out tasks
    tasks = sorted(
        [task for task in all_tasks if should_process_task(task)],
        key=lambda x: filename_utils.get_dataset_from_filename(x.name))

    if args.dry_run:
        print('All tasks:')
        print_tasks(all_tasks)

    print('Filtered tasks:')
    print_tasks(tasks)

    print('# tasks per type (filtered/unfiltered)')
    task_type_counter_unfiltered = collections.Counter(
        [t.type for t in all_tasks])
    task_type_counter_filtered = collections.Counter([t.type for t in tasks])
    for task_type, unfiltered_count in task_type_counter_unfiltered.items():
        print('\t{:25} {:2}/{:2}'.format(
            task_type, task_type_counter_filtered.get(task_type, 0),
            unfiltered_count))
    print('\n')

    if args.dry_run:
        print('Only doing a dry-run. Exiting.')
        return

    num_tasks = len(tasks)
    for task_idx, t in enumerate(tasks):

        def print_task(msg=''):
            LOGGER.info(
                'Task {idx:>2}/{num_tasks}: {t.type:30} - {t.name:40} - {msg}'.
                format(idx=task_idx + 1, num_tasks=num_tasks, t=t, msg=msg))

        start_time = time()
        print_task('Started')
        try:
            task_runner.run_classification_task(t, classification_options,
                                                experiment_config)
            gc.collect()
        except Exception as e:
            print_task('Error: {}'.format(e))
            LOGGER.exception(e)
        elapsed_seconds = time() - start_time
        print_task('Finished (time={})'.format(
            time_utils.seconds_to_human_readable(elapsed_seconds)))
        gc.collect()

    LOGGER.info('Finished!')