edit_distance = sys.maxsize

        alexa_genre_length += len(alexa_genre_name)
        for dmoz_genre_name, dmoz_genre_info in dmoz_dict.items():
            if edit_distance is 0:
                break

            if alexa_genre_name == dmoz_genre_name:
                exact_match += 1
                edit_distance = 0

            else:
                edit_distance = min(edit_distance, levenshtein(dmoz_genre_name, alexa_genre_name))

        if edit_distance is not sys.maxsize:
            total_edit_distance += edit_distance
            edit_distance_count += 1

    return {
        "alexa_total": len(alexa_dict),
        "edit_distance_count": edit_distance_count,
        "total_edit_distance": total_edit_distance,
        "alexa_match": exact_match,
        "alexa_genre_length": alexa_genre_length,
    }

    # accumulate stats and update the collection


MongoDB.connect(settings.HOST_NAME, settings.PORT)
Example #2
0
    def start(cls):
        MongoDB.connect(settings.HOST_NAME,settings.PORT)

        return cls