Example #1
0
    def from_crawler(cls, crawler):
        mongo_uri = preconditions.check_not_none_or_whitespace(crawler.settings.get(MONGO_URI), MONGO_URI,
                                                               exception=BoSettingsError)
        mongo_db = preconditions.check_not_none_or_whitespace(crawler.settings.get(MONGO_DATABASE), MONGO_DATABASE,
                                                              exception=BoSettingsError)
        collection_name = preconditions.check_not_none_or_whitespace(crawler.settings.get(MONGO_COLLECTION_NAME),
                                                                     MONGO_COLLECTION_NAME, exception=BoSettingsError)

        return cls(mongo_uri=mongo_uri, mongo_db=mongo_db, collection_name=collection_name)
Example #2
0
    def from_crawler(cls, crawler):
        api_key = crawler.settings.get(ALCHEMY_API_KEY)
        tags_file = crawler.settings.get(TAGS_FILE)
        tag_match_threshold = crawler.settings.getint(TAG_MATCH_THRESHOLD, default=None)
        relevance_threshold = crawler.settings.getfloat(RELEVANCE_THRESHOLD, default=None)
        case_insensitive_tags = crawler.settings.getbool(CASE_INSENSITIVE_TAGS, default=None)

        preconditions.check_not_none_or_whitespace(api_key, ALCHEMY_API_KEY, exception=BoSettingsError)
        preconditions.check_not_none_or_whitespace(tags_file, TAGS_FILE, exception=BoSettingsError)
        preconditions.check_not_none(tag_match_threshold, TAG_MATCH_THRESHOLD, exception=BoSettingsError)
        preconditions.check_not_none(relevance_threshold, RELEVANCE_THRESHOLD, exception=BoSettingsError)
        preconditions.check_not_none(case_insensitive_tags, CASE_INSENSITIVE_TAGS, exception=BoSettingsError)

        return cls(api_key, tags_file, tag_match_threshold, case_insensitive_tags, relevance_threshold)