logging.basicConfig(
    format='%(levelname)7s - %(name)s - %(asctime)s: %(message)s',
    filename='run.log',
    level=log_level)
console = logging.StreamHandler()
console.setFormatter(
    logging.Formatter('%(levelname)7s - %(name)-8s: %(message)s'))
logging.getLogger('').addHandler(console)
log = logging.getLogger('main')

# ----------------------------------------------------------------
# Load various components, and configure the modules that control
# the crawling process
#
corpus_table = CorpusTable.CorpusTable(args.dbdir)  # Storage layer
spider = HTTPClient.HTTPClient()  # Retrieval code
url_normaliser = Normalisation.URLNormaliser()  # URL normaliser
feature_extractor = Features.Features(url_normaliser,
                                      ['title', 'h1'])  # Feature extractor
# URL Fitness Function
#url_rank_function   = SimplicityURLRank.SimplicityURLRank()                         # Prefer simple URLs
#url_rank_function   = SampleURLRank.SampleURLRank()                                 # Sample code
url_rank_function = HumanReadableURLRank.HumanReadableURLRank(
)  # Prefer human-readable URLs
page_filters = [  # Filters for page rejection
    # FuzzyDuplicateFilter.FuzzyDuplicateFilter(corpus_table),   # Fuzzy hash using ssdeep
    DuplicateFilter.DuplicateFilter(corpus_table),  # Perfect duplicate checker
    MinimumLengthFilter.MinimumLengthFilter(100),  # Min length
    MaximumLengthFilter.MaximumLengthFilter(800000),  # Max length
    URLCountFilter.URLCountFilter(0, 1000),  # URL count
    MetadataRegexpFilter.MetadataRegexpFilter(