def reprocess_errors(db_root, sources):
    res = dict()

    for source in sources:
        provider_db = Provider(db_root, source.SOURCE_NAME)

        for date_string in provider_db.get_all_days():
            errors_by_batch = provider_db.get_errors2_per_batch(date_string)
            for (time, errors) in errors_by_batch:
                if errors:
                    print source.SOURCE_NAME, date_string, time, "found {0} errors".format(len(errors))
                    batch_directory = os.path.join(db_root, source.SOURCE_NAME, date_string, time)
                    articles, deleted_articles, errors, raw_data = reprocess_batch_errors(source, date_string, time, errors)
                    save_reprocessed_data(batch_directory, articles, deleted_articles, raw_data)
                    update_errors_file(batch_directory, errors)
Example #2
0
def list_errors(db_root, outfile, source_list):
    res = dict()
    all_errors = dict()
    if not source_list:
        source_names = get_all_provider_names(db_root)
    else:
        source_names = source_list.split(",")

    for source_name in source_names:
        provider_db = Provider(db_root, source_name)
        error_count = 0
        all_errors[source_name] = dict()
        all_errors[source_name] = list()
        for date_string in provider_db.get_all_days():
            errors_by_batch = provider_db.get_errors2_per_batch(date_string)

            for (batch_time, errors) in errors_by_batch:
                errors = it.chain(*errors)
                #errors = flatten_list(errors)

                errors = filter_identical_ErrorLogEntries(errors)
                error_count += len(errors)

                if errors:
                    #print source_name, date_string, batch_time
                    for e in errors:
                        new_item = ((u"{0}/{1}".format(date_string, batch_time)), (e.url, e.title, e.stacktrace))
                        print u"+++ [{0}] {1}   ({2})".format(new_item[0], new_item[1][1], new_item[1][0])
                        all_errors[source_name].append(new_item)
                        source_parser = NAME_TO_SOURCE_MODULE_MAPPING[source_name]

        res[source_name] = error_count

    print "\n" * 4
    for name, error_count in res.items():
        print "{0}: Had {1} errors".format(name, error_count)
        print "{0}: Had {1} errors".format(name, len(all_errors[name]))

    with open(outfile, 'w') as f:
        json.dump(all_errors, f, indent=2)