Esempio n. 1
0
    def test_levenstein_check_no_match(self):

        fake = Faker()
        keywords = [fake.domain_word()]
        info("Generated keyword: {}".format(keywords))

        domain = "{}x.com".format(fake.domain_word())
        info("Requested domain - {}".format(domain))
        l = levenstein_check(keywords, domain.split('.'))
        assert_type(l, tuple, "Check if proper tuple is returned")
        assert_false(l[0], "Check if keyword matches domain")
        assert_none(l[1], "Check if no keyword matches domain")
        assert_none(l[2], "Check if no keyword is returned")
        assert_none(l[3], "Check if no levenstein distance is returned")
Esempio n. 2
0
    def test_levenstein_check(self):

        fake = Faker()
        keywords = [fake.domain_word() for i in range(0, 2)]
        info("Generated keyword: {}".format(keywords))

        proper_keyword = keywords[0]
        domain = "{}x.{}.{}awdawdawdfawytdawdrawd.com".format(
            proper_keyword, proper_keyword, proper_keyword)
        info("Requested domain - {}".format(domain))
        l = levenstein_check(keywords, domain.split('.'))
        assert_type(l, tuple, "Check if proper tuple is returned")
        assert_true(l[0], "Check if keyword matches domain")
        assert_equal(l[1], 1, "Check if keyword matches domain only one time")
        assert_equal(l[2], proper_keyword,
                     "Check if proper keyword is returned")
        assert_equal(l[3], 1,
                     "Check if proper levenstein distance is returned")
Esempio n. 3
0
def get_levenstein_details(url_body):
    try:
        jsonschema.validate(url_body, details_url_schema)
    except jsonschema.exceptions.ValidationError as exc:
        raise BadRequest(exc.message)

    domain = url_to_domain(url_body.get('url'))
    good_keywords = [k['good_keyword'] for k in Goodies.get_all_goodies()]
    domain_phrases = domain.split('.')
    _, _, lev_keyword, lev_dist = levenstein.levenstein_check(good_keywords, domain_phrases)
    if not lev_keyword:
        return _no_data_response()

    response_text = {
        "details": {
            "matched_keyword": lev_keyword,
            "levenstein_distance": lev_dist
        }
    }
    return Response(json.dumps(
            response_text,
            default=_default_json_model
            ), 200, mimetype="application/json")
def verify_levenstein(domain):
    """

    Prepares list of good_keywords eg ['facebook', 'google', 'onet']
    Splits domain by '.' eg ['weka', 'pwr', 'edu', 'pl']
    Prepares dict for levenstein's values: {
        'keyword': 0
    }
    Do not compare if given keyword is same as domain
    If length of keyword is less than 2 * length of domain phrase
        and length of domain phrase is longer than 2 (to exclude eg 'wp' or 'pl')
        and levenstein's distance is less than 3 and more than 0
        then increase lev's amount for keyword
    If amount of matches for a phrase from domain is less than half of amount of phrases
        then it is malicious
    else
        it is not

    """
    good_keywords = [k['good_keyword'] for k in Goodies.get_all_goodies()]
    domain_phrases = domain.split('.')
    verdict, _, _, _ = levenstein_check(good_keywords, domain_phrases)
    return verdict
Esempio n. 5
0
def create_baddie(domain):
    _, ip_id = add_ip(domain)
    _, crt_id = add_cert(domain)
    good_keywords = [k['good_keyword'] for k in Goodies.get_all_goodies()]
    domain_phrases = domain.split('.')
    _, _, lev_matched_keyword = lev.levenstein_check(good_keywords,
                                                     domain_phrases)

    min_lev_distance = 0
    lev_distance = 0
    if lev_matched_keyword:
        for phrase in domain_phrases:
            lev_distance = lev.calculate_levenstein(lev_matched_keyword,
                                                    phrase)
            if 3 > min_lev_distance > lev_distance:
                min_lev_distance = lev_distance
    if not lev_matched_keyword:
        lev_matched_keyword = ''
    _, contained_matched_keyword = match_keyword(domain)
    if not contained_matched_keyword:
        contained_matched_keyword = ''
    entropy = ent.get_entropy(domain)
    return add_baddie(domain, ip_id[1], crt_id[1], lev_distance,
                      lev_matched_keyword, contained_matched_keyword, entropy)