Esempio n. 1
0
    def min_entropy(inList):
        """Returns the minimum shannon entropy of URIs in the list"""

        minEntropy = en.shannon_entropy(inList[0])

        for uri in inList:
            if minEntropy > en.shannon_entropy(uri):
                minEntropy = en.shannon_entropy(uri)

        return(minEntropy)
Esempio n. 2
0
 def __iter__(self):
     for word in self.context.data.split():
         if self.config['word_length_min'] <= len(
                 word) <= self.config['word_length_max']:
             logger.debug(
                 "found word ({}) that matched length constaints of min:{} and max:{}".\
                     format(word,
                            self.config['word_length_min'],
                            self.config['word_length_max']))
             if shannon_entropy(word) >= self.config['entropy_min']:
                 yield self.create_secret(shannon_entropy(word), word)
Esempio n. 3
0
    def max_entropy(inList):
        """returns the maximum shannon entropy of URIs in the list"""
        try:
            maxEntropy = en.shannon_entropy(inList[0])
        except(IndexError):
            maxEntropy = en.shannon_entropy(inList)

        for uri in inList:
            if maxEntropy <  en.shannon_entropy(uri):
                maxEntropy = en.shannon_entropy(uri)

        return(maxEntropy)
Esempio n. 4
0
def decrypt_str(args, binary, len_str, key):
    """Decrypt Spora's config"""

    # Save AES key
    if args.output_dir:
        out_dir = "{:}/{:}/".format(args.output_dir,
                                    os.path.basename(args.file))
        out_path = out_dir + "AES256.key"

        # Check for the output directory
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        # Write the decrypted file
        with open(out_path, 'w') as f:
            f.write(key)

    # Decrypt data
    for call in len_str:
        # Init Crypto stuff
        h = SHA256.new()
        c = AES.new(key, AES.MODE_CBC, '\x00' * 16)

        enc_bytes_list = get_bin_bytes(binary, call["str"], call["len"])
        enc_bytes_str = b''.join([chr(i) for i in enc_bytes_list])

        dec_bytes_str = c.decrypt(enc_bytes_str)

        h.update(dec_bytes_str)
        entropy = {
            "enc": shannon_entropy(enc_bytes_str),
            "dec": shannon_entropy(dec_bytes_str)
        }

        # Print file hash and size
        if args.verbose >= 1:
            print "\nFile decrypted SHA256: {:}, size: {:}".format(
                h.hexdigest(), call["len"])

        # Print entropy
        if args.verbose >= 2:
            print "Entropy of {:}: before = {:}, after = {:}".format(
                h.hexdigest(), entropy["enc"], entropy["dec"])
        # Save the decrypted file
        if args.output_dir:
            out_path = out_dir + h.hexdigest()

            # Write the decrypted file
            with open(out_path, 'w') as f:
                f.write(dec_bytes_str)

        if args.print_config:
            print "{:}".format(dec_bytes_str)
Esempio n. 5
0
def score_domain(domain):
    """Score `domain`.

    The highest score, the most probable `domain` is a phishing site.

    Args:
        domain (str): the domain to check.

    Returns:
        int: the score of `domain`.
    """
    score = 0
    for tld in suspicious_tld:
        if domain.endswith(tld):
            score += 20
    for keyword in suspicious_keywords:
        if keyword in domain:
            score += 25
    for keyword in highly_suspicious:
        if keyword in domain:
            score += 60
    score += int(round(entropy.shannon_entropy(domain) * 50))

    # Lots of '-' (ie. www.paypal-datacenter.com-acccount-alert.com)
    if 'xn--' not in domain and domain.count('-') >= 4:
        score += 20

    # Deeply nested subdomains (ie. www.paypal.com.security.accountupdate.gq)
    if domain.count('.') >= 4:
        score += 20
    return score
Esempio n. 6
0
def compute_entropy(region_file: str, tabix_file: str):

    tx = tabix.open(tabix_file)

    with open(region_file) as regions:
        reader = csv.reader(regions, delimiter="\t")
        for region in reader:
            # avoid header line #
            if str(region[0]).startswith("#") == True:
                print("\t".join(region), "entropy", sep="\t")
            else:
                chromosome = region[0]
                start = int(region[1])
                end = int(region[2])
                size = end - start
                serie = [0] * size

                for record in tx.query(chromosome, start, end):
                    t_start = int(record[1])
                    t_end = int(record[2])
                    index = t_start - start
                    serie[index] += 1

                serie_str = "".join(str(i) for i in serie)
                e = entropy.shannon_entropy(serie_str)

                print("\t".join(region), e, sep="\t")
Esempio n. 7
0
 def get_t_wave_durations_entropy(self):
     t_wave_durations = self.get_t_wave_durations().ravel()
     ''' hist, bin_edges = np.histogram(t_wave_durations, 'auto')
     bin_map_t_waves = np.digitize(t_wave_durations, bin_edges[:-1])
     bin_map_t_waves = np.array(list(map(lambda x: hist[x-1]/len(t_wave_durations), bin_map_t_waves))) '''
     #return sps.entropy(bin_map_t_waves, base = 2)
     return shannon_entropy(t_wave_durations)
Esempio n. 8
0
def get_entropy_of_file(FilePath):
    file_read = open(FilePath, 'r')
    file_data = file_read.read()

    entropy_file = entropy.shannon_entropy(file_data)
    file_read.close()
    return entropy_file
Esempio n. 9
0
 def get_rr_interval_durations_entropy(self):
     p_wave_durations = self.segments.get('rr_interval')
     ''' hist, bin_edges = np.histogram(p_wave_durations, 'auto')
     bin_map_p_waves = np.digitize(p_wave_durations, bin_edges[:-1])
     bin_map_p_waves = np.array(list(map(lambda x: hist[x-1]/len(p_wave_durations), bin_map_p_waves))) '''
     #return sps.entropy(bin_map_p_waves, base = 2)
     return shannon_entropy(p_wave_durations)
def score_domain(domain):
    """Score `domain`.

    The highest score, the most probable `domain` is a phishing site.

    Args:
        domain (str): the domain to check.

    Returns:
        int: the score of `domain`.
    """
    score = 0
    for t in suspicious['tlds']:
        if domain.endswith(t):
            score += 20

    # Remove initial '*.' for wildcard certificates bug
    if domain.startswith('*.'):
        domain = domain[2:]

    # Removing TLD to catch inner TLD in subdomain (ie. paypal.com.domain.com)
    try:
        res = get_tld(domain, as_object=True, fail_silently=True, fix_protocol=True)
        domain = '.'.join([res.subdomain, res.domain])
    except Exception:
        pass

    # Higer entropy is kind of suspicious
    score += int(round(entropy.shannon_entropy(domain)*50))

    # Remove lookalike characters using list from http://www.unicode.org/reports/tr39
    domain = unconfuse(domain)

    words_in_domain = re.split("\W+", domain)

    # ie. detect fake .com (ie. *.com-account-management.info)
    if words_in_domain[0] in ['com', 'net', 'org']:
        score += 10

    # Testing keywords
    for word in suspicious['keywords']:
        if word in domain:
            score += suspicious['keywords'][word]

    # Testing Levenshtein distance for strong keywords (>= 70 points) (ie. paypol)
    for key in [k for (k,s) in suspicious['keywords'].items() if s >= 70]:
        # Removing too generic keywords (ie. mail.domain.com)
        for word in [w for w in words_in_domain if w not in ['email', 'mail', 'cloud']]:
            if distance(str(word), str(key)) == 1:
                score += 70

    # Lots of '-' (ie. www.paypal-datacenter.com-acccount-alert.com)
    if 'xn--' not in domain and domain.count('-') >= 4:
        score += domain.count('-') * 3

    # Deeply nested subdomains (ie. www.paypal.com.security.accountupdate.gq)
    if domain.count('.') >= 3:
        score += domain.count('.') * 3

    return score
Esempio n. 11
0
def subdomain_entropy(hostname: str) -> float:
    domain = get_domain(hostname)
    if pd.isna(domain):
        return np.nan

    subdomain = hostname[:-len(domain)]
    return shannon_entropy(subdomain)
Esempio n. 12
0
 def get_pr_interval_durations_entropy(self):
     pr_intervals = self.get_pr_intervals().ravel()
     ''' hist, bin_edges = np.histogram(pr_intervals,'auto')
     bin_map_pr_interval = np.digitize(pr_intervals,bin_edges[:-1])
     bin_map_pr_interval = np.array(list(map(lambda x: hist[x-1]/len(pr_intervals), bin_map_pr_interval))) '''
     #return sps.entropy(bin_map_pr_interval, base = 2)
     return shannon_entropy(pr_intervals)
Esempio n. 13
0
def score_domain(domain):
    """Score `domain`.
    The highest score, the most probable `domain` is a phishing site.
    Args:
        domain (str): the domain to check.
    Returns:
        int: the score of `domain`.
    """
    score = 0
    mult = 1
    for tld in suspicious_tld:
        if domain.endswith(tld):
            score += 20
            mult += 1
    for keyword in suspicious_keywords:
        if keyword in domain:
            score += 25
            mult += 1
    for keyword in highly_suspicious:
        if keyword in domain:
            score += 60
            mult += 1
    score += (int(round(entropy.shannon_entropy(domain)*50))) * mult

    # Lots of '-' (ie. www.paypal-datacenter.com-acccount-alert.com)
    if 'xn--' not in domain and domain.count('-') >= 4:
        score += 20
    return score
Esempio n. 14
0
def alleged_domain(phishy):
    """How sketchy is the domain in question?  Performs statistical, symantic, and other reasoning techniques to
        separate the wheat from the chaff
        input:
        phishy - the domain (str)

        returns - score (int or float depending on the quant. techniques
        """

    score = 0
    for _tld in prefixes:
        if phishy.endswith(_tld):
            score += 20

    if phishy.startswith('*.'):
        phishy = phishy[2:]

    # https://arstechnica.com/information-technology/2017/06/phishing-attacks-target-mobile-browsers-with-dash-padded-urls/
    try:
        res = get_tld(phishy, as_object=True, fail_silently=True, fix_protocol=True)
        phishy = '.'.join([res.subdomain, res.domain])
    except Exception as exc:
        pass

    words_in_domain = re.split("\W+", phishy)

    # How fun are wildcards?  Not fun with language parsers
    if phishy.startswith('*.'):
        phishy = phishy[2:]
        if words_in_domain[0] in ['com', 'net', 'org']:
            score += 10

    # Testing keywords
    for word, val in phrases.iteritems():
        if word in phishy:
            score += phrases[word]

    # Too random?
    score += int(round(entropy.shannon_entropy(phishy) * 50.2))

    # How likely is this like others?
    for key in [k for (k, s) in phrases.items() if s >= 70]:
        # Massaging dataset massaging with normalization
        for word in [w for w in words_in_domain if w not in ['cloud', 'mail', 'email']]:
            if ratio(str(word), str(key)) == 1:
                score += 70

    '''Markov chain confusion Not released to the public'''
    '''K closest neighbors and cluster analysis (similar to Levenshstein ratios) not released to the public'''

    #Oh China....
    if 'xn--' not in phishy and phishy.count('-') >= 4:
        score += phishy.count('-') * 3

    # Humans rarely, rationally pick 3+ subdomains deep
    if phishy.count('.') >= 3:
        score += phishy.count('.') * 3

    return score
Esempio n. 15
0
def score_domain(domain):
    """Score `domain`.
    The highest score, the most probable `domain` is a phishing site.
    Args:
        domain (str): the domain to check.
    Returns:
        int: the score of `domain`.
    #https://github.com/x0rz/phishing_catcher/blob/master/catch_phishing.py
    """
    score = 0
    for t in tlds:
        if domain.endswith(t):
            score += 20

    # Remove initial '*.' for wildcard certificates bug
    if domain.startswith("*."):
        domain = domain[2:]

    # Removing TLD to catch inner TLD in subdomain (ie. paypal.com.domain.com)
    try:
        res = get_tld(domain, as_object=True, fail_silently=True, fix_protocol=True)
        domain = ".".join([res.subdomain, res.domain])
    except:  # noqa: B110
        pass

    words_in_domain = re.split("\W+", domain)

    # Remove initial '*.' for wildcard certificates bug
    if domain.startswith("*."):
        domain = domain[2:]
        # ie. detect fake .com (ie. *.com-account-management.info)
        if words_in_domain[0] in ["com", "net", "org"]:
            score += 10

    # Testing keywords
    for word in keywords.keys():
        if word in domain:
            score += keywords[word]

    # Higer entropy is kind of suspicious
    score += int(round(entropy.shannon_entropy(domain) * 50))

    # Testing Levenshtein distance for strong keywords (>= 70 points) (ie. paypol)
    for key in [k for (k, s) in keywords.items() if s >= 70]:
        # Removing too generic keywords (ie. mail.domain.com)
        for word in [w for w in words_in_domain if w not in ["email", "mail", "cloud"]]:
            if distance(str(word), str(key)) == 1:
                score += 70

    # Lots of '-' (ie. www.paypal-datacenter.com-acccount-alert.com)
    if "xn--" not in domain and domain.count("-") >= 4:
        score += domain.count("-") * 3

    # Deeply nested subdomains (ie. www.paypal.com.security.accountupdate.gq)
    if domain.count(".") >= 3:
        score += domain.count(".") * 3

    return score
Esempio n. 16
0
 def execute_all_measurements(self, base64_expression_decoded):
     results = dict()
     results['entropy'] = shannon_entropy(base64_expression_decoded)
     results['strings'] = self.words_in_strings(
         base64_expression_decoded, self._word_list,
         int(self.config[self.NAME]['string_min_length']))
     results['filetype'] = get_file_type_from_binary(
         base64_expression_decoded)
     return results
    def max_entropy(inList):
        """returns the maximum shannon entropy of URIs in the list"""
        try:
            maxEntropy = en.shannon_entropy(inList[0])
        except (IndexError, TypeError, KeyError):
            try:
                maxEntropy = en.shannon_entropy(inList)
            except (TypeError):
                maxEntropy = 0.0

        for uri in inList:
            try:
                if maxEntropy < en.shannon_entropy(uri):
                    maxEntropy = en.shannon_entropy(uri)
            except (IndexError, TypeError, KeyError):
                print()

        return (maxEntropy)
Esempio n. 18
0
def score_domain(config, domain, args):
    """ """
    # dbugger = ['------------------------------------------------------------']
    # dbugger.append(domain)
    score = 0
    for t in config["tlds"]:
        if domain.endswith(t):
            score += 20
            # dbugger.append("TLD: {}".format(t))

    try:
        res = get_tld(domain, as_object=True, fail_silently=True, fix_protocol=True)

        if res is not None:
            domain = '.'.join([res.subdomain, res.domain])
    except Exception as err:
        failed_message(args, err, domain)
        pass

    score += int(round(entropy.shannon_entropy(domain)*50))
    # dbugger.append("Entropy: {}".format(int(round(entropy.shannon_entropy(domain)*50))))

    domain          = unconfuse(domain)
    words_in_domain = re.split(r"\W+", domain)

    if words_in_domain[0] in ["com", "net", "org"]:
        score += 10
        # dbugger.append("Com-net-org: {}".format(words_in_domain[0]))

    for word in config["keywords"]:
        if word in domain:
            score += config["keywords"][word]
            # dbugger.append("Keyword: {}".format(len(config["keywords"])))
            # dbugger.append("Keyword: {}".format(word))

    for key in [k for (k,s) in config["keywords"].items() if s >= 70]:
        for word in [w for w in words_in_domain if w not in ["email", "mail", "cloud"]]:
            if distance(str(word), str(key)) == 1:
                score += 70
                # dbugger.append("Distance: {}, {}".format(str(word), str(key)))

    if "xn--" not in domain and domain.count("-") >= 4:
        score += domain.count("-") * 3
        # dbugger.append("Count dashes: {}".format(domain.count(".")))

    if domain.count(".") >= 3:
        score += domain.count(".") * 3
        # dbugger.append("Count period: {}".format(domain.count(".")))

    # dbugger.append("\nScore: {}".format(score))
    # dbugger.append('------------------------------------------------------------')

    # with open("dbug_file", "a") as dbug_file:
    #     for dbug in dbugger:
    #         dbug_file.write("{}\n".format(dbug))
    return score
Esempio n. 19
0
 def entropy(segments):
     try:
         ''' hist, bin_edges = np.histogram(segments,'auto')
         bin_map_pr_interval = np.digitize(segments,bin_edges[:-1])
         bin_map_pr_interval = np.array(list(map(lambda x: hist[x-1]/len(segments), bin_map_pr_interval)))
         return sps.entropy(bin_map_pr_interval, base = 2) '''
         return shannon_entropy(segments)
     except Exception as e:
         print(str(e), file = sys.stderr)
         return 0.0
Esempio n. 20
0
def plot_scatter(legit, dga):
    legit_len, legit_entropy, dga_len, dga_entropy = [], [], [], []
    for x in legit:
        legit_len.append(len(x))
        legit_entropy.append(entropy.shannon_entropy(x))
    for x in dga:
        dga_len.append(len(x))
        dga_entropy.append(entropy.shannon_entropy(x))
    plt.scatter(legit_len,
                legit_entropy,
                s=140,
                c='#aaaaff',
                label='Legit',
                alpha=.2)
    plt.scatter(dga_len, dga_entropy, s=40, c='r', label='DGA', alpha=.3)
    plt.legend()
    plt.xlabel('Domain Length')
    plt.ylabel('Domain Entropy')
    plt.show()
 def is_chrome_dn(self, dn):
     dn_segs = dn.split('.')
     num_segs = len(dn_segs)
     alpha_num = 0
     random_len = len(dn_segs[0])
     if random_len >= 10 and random_len <= 10 and shannon_entropy(
             dn_segs[0]) > 0.30:
         for letter in dn_segs[0]:
             if letter in self.ALPHABET:
                 alpha_num += 1
         if alpha_num == random_len:
             return True
     return False
Esempio n. 22
0
def gentropy(email, compare=None):
    particle = email.pop('body')

    try:
        ent = entropy.shannon_entropy(particle)
        email['entropy'] = ent

        email['date'] = email['date'].isoformat() #convert to str

    except:
        print "[ERROR] Failed to parse: %s " % (particle)

    return email
Esempio n. 23
0
def validate_ssn(ssn):
    """
    Utility function to normalize social security numbers (SSN)

    :param ssn: 
        Type: String
        Default: None
        Description: The SSN to normalize   
        
    :return: 
        Function accepts any SSN string and, if determined to be valid, outputs 
        the SSN in the format 'XXX-XX-XXXX'
        
        If SSN argument is invalid, None is returned
        
        Invalid SSNs are:
        1) Not equal to 9 numeric digits in length
        2) Equal to known "bad_ssns" values like "123456789"
        3) Numbers with all 0's in any digit group like "000-XX-XXXX" or "XXX-00-XXXX" or "XXX-XX-0000"
        4) Numbers in first digit group between "900" and "999"
        5) Numbers with a Shannon Entropy value <.16 like "111-22-2222"
    """
    if not ssn:
        return None
    bad_ssns = ['123456789']
    numeric_digits = re.compile(r'[^0-9]+')
    ssn_digits = numeric_digits.sub('', ssn)
    if len(ssn_digits) != 9:
        ssn_digits = None
        raise ValueError(
            'The value passed as an SSN was not nine numeric digits in length: {}'
            .format(ssn))
    elif ssn_digits:
        ssn_compile = re.compile(
            r'.*([0-8][0-9]{2}).*([0-9]{2}).*([0-9]{4}).*')
        n_ssn = ssn_compile.match(ssn_digits)
        if n_ssn:
            n_ssn_digits = str('{}{}{}'.format(n_ssn.group(1), n_ssn.group(2),
                                               n_ssn.group(3)))
            if (n_ssn_digits in bad_ssns) or (n_ssn.group(1) in [
                    '666', '000'
            ]) or (n_ssn.group(2) in ['00']) or (n_ssn.group(3) in ['0000']):
                raise ValueError(
                    'An invalid value was supplied as an SSN: '.format(ssn))
            elif shannon_entropy(n_ssn_digits) < .16:
                raise ValueError(
                    'The value supplied as an SSN does not pass shannon entropy requirements: '
                    .format(ssn))
            else:
                return str('{}{}{}'.format(n_ssn.group(1), n_ssn.group(2),
                                           n_ssn.group(3)))
Esempio n. 24
0
def score_domain(domain):
    score = 0
    for tld in tlds:
        if domain.endswith(tld):
            score += 20

    # for wildcard certs, remove *.
    if domain.startswith('*.'):
        domain = domain[2:]

    try:
        res = get_tld(domain,
                      as_object=True,
                      fail_silentyl=True,
                      fix_protocol=True)
        domain = '.'.join([res.subdomain, res.domain])
    except:
        pass

    words_in_domain = re.split("\W+", domain)

    # for wildcard certs, remove *.
    if domain.startswith('*.'):
        domain = domain[2:]
        if words_in_domain[0] in ['com', 'net', 'org']:
            score += 10

    for word in keywords.keys():
        if word in domain:
            score += keywords[word]

    score += int(round(entropy.shannon_entropy(domain) * 50))

    for key in [k for (k, s) in keywords.items() if s >= 70]:
        for word in [
                w for w in words_in_domain
                if w not in ['email', 'mail', 'cloud']
        ]:
            if distance(str(word), str(key)) == 1:
                score += 70

    if 'xn--' not in domain and domain.count('-') >= 4:
        score += domain.count('-') * 3

    if domain.count('.') >= 3:
        score += domain.count('.') * 3

    return score
Esempio n. 25
0
    def _enumerate_encrypted_assets(self):
        """Returns a list of files in the APK assets that have high entropy."""
        files = []
        for filename, filetype in self.apk.get_files_types().items():
            if "assets" in filename:
                buf = self.apk.zip.read(filename)
                file_entropy = entropy.shannon_entropy(buf)
                if file_entropy > 0.9:
                    files.append({
                        "name": filename,
                        "entropy": file_entropy,
                        "size": len(buf),
                        "type": filetype,
                    })

        return files
Esempio n. 26
0
def score_domain(suspicious, domain, args):
    """ """
    score = 0
    for t in suspicious["tlds"]:
        if domain.endswith(t):
            score += 20

    try:
        res = get_tld(domain,
                      as_object=True,
                      fail_silently=True,
                      fix_protocol=True)

        if res is not None:
            domain = '.'.join([res.subdomain, res.domain])
    except Exception as err:
        failed_message(args, err, domain)
        pass

    score += int(round(entropy.shannon_entropy(domain) * 50))

    domain = unconfuse(domain)
    words_in_domain = re.split(r"\W+", domain)

    if words_in_domain[0] in ["com", "net", "org"]:
        score += 10

    for word in suspicious["keywords"]:
        if word in domain:
            score += suspicious["keywords"][word]

    for key in [k for (k, s) in suspicious["keywords"].items() if s >= 70]:
        for word in [
                w for w in words_in_domain
                if w not in ["email", "mail", "cloud"]
        ]:
            if distance(str(word), str(key)) == 1:
                score += 70

    if "xn--" not in domain and domain.count("-") >= 4:
        score += domain.count("-") * 3

    if domain.count(".") >= 3:
        score += domain.count(".") * 3

    return score
Esempio n. 27
0
 def find_encrypted_assets(self, apk):
     """Returns a dict of files in the APK assets that have high entropy."""
     ret = []
     for fname, filetype in apk.get_files_types().items():
         if "assets" in fname:
             if ".png" in fname and "png" in filetype.lower():
                 continue
             buf = apk.zip.read(fname)
             file_entropy = entropy.shannon_entropy(buf)
             if file_entropy > 0.9:
                 ret.append({
                     "name": fname,
                     "entropy": file_entropy,
                     "size": len(buf),
                     "type": filetype,
                 })
     return ret
Esempio n. 28
0
def score_domain(domain):
    score = 0
    for tld in suspicious_tld:
        if domain.endswith(tld):
            score += 20
    for keyword in suspicious_keywords:
        if keyword in domain:
            score += 25
    for keyword in highly_suspicious:
        if keyword in domain:
            score += 60
    score += int(round(entropy.shannon_entropy(domain) * 50))

    # Lots of '-' (ie. www.paypal-datacenter.com-acccount-alert.com)
    if not 'xn--' in domain and domain.count('-') >= 4:
        score += 20
    return score
Esempio n. 29
0
def score_domain(provided_ioc):
    """Return the scores of the provided domain."""
    score = 0

    for suspicious_tld in suspicious["tlds"]:
        if provided_ioc.endswith(suspicious_tld):
            score += 20

    try:
        res = tld.get_tld(provided_ioc,
                          as_object=True,
                          fail_silently=True,
                          fix_protocol=True)
        domain = ".".join([res.subdomain, res.domain])
    except Exception:
        domain = provided_ioc

    score += int(round(entropy.shannon_entropy(domain) * 50))
    domain = confusables.unconfuse(domain)
    words_in_domain = re.split("\W+", domain)

    if domain.startswith("*."):
        domain = domain[2:]

        if words_in_domain[0] in ["com", "net", "org"]:
            score += 10

    for word in suspicious["keywords"]:
        if word in domain:
            score += suspicious["keywords"][word]

    for key in [k for k, v in suspicious["keywords"].items() if v >= 70]:
        for word in [
                w for w in words_in_domain
                if w not in ["email", "mail", "cloud"]
        ]:
            if pylev.levenshtein(str(word), str(key)) == 1:
                score += 70

    if "xn--" not in domain and domain.count("-") >= 4:
        score += domain.count("-") * 3

    if domain.count(".") >= 3:
        score += domain.count(".") * 3
    return score
Esempio n. 30
0
def findKeyLen(data, maxKeyLen):
	table = {}
	for keylen in range(1,maxKeyLen):
		entsum = 0
		for i in range(keylen):
			subtable = data[i::keylen]
			entsum += entropy.shannon_entropy(subtable)
		averageent = entsum / keylen
		table[keylen] = averageent
		print "keylen: %02d, average entropy: %f" % (keylen , averageent)
	
	keys = sorted(table, key=table.__getitem__)
	probablekeys = {}
	a = 1000
	for kl in keys:
		if table[kl] < a:
			a = table[kl]
		if table[kl] - a < 0.1:
			probablekeys[kl] = table[kl]
	return sorted(probablekeys)[0]
Esempio n. 31
0
def keywords(data):

    # retweet_count

    d6 = pd.DataFrame()

    # tweet entropy
    l = []
    for i in range(len(data.text)):
        l.append(entropy.shannon_entropy(data.text[i]))

    d6['tweet_entropy'] = pd.Series(l)

    # number of words in tweet
    l = []
    for i in range(len(data.text)):
        l.append(len(pd.Series(data.text[i].split())))

    d6['no_of_words'] = pd.Series(l)

    # % of unique words in the tweet
    l = []
    for i in range(len(data.text)):

        l.append(len(data.text[i]))

    d6['tweet_length'] = pd.Series(l)

    d6x = pd.DataFrame({
        'sum': d6.sum().astype('int'),
        'median': d6.median(),
        'mean': d6.mean(),
        'std': d6.std()
    })

    d6x = d6x.round(decimals=3)

    del d6

    return d6x
Esempio n. 32
0
    def process(self):
        # print("SECTIONS")
        # logging.debug("loading pefile")
        pelib = self._getLibrary(PEFileModule().getName())
        if(pelib is None):
            return ""

        # logging.debug("iterating sections")
        ret = []
        number = 0

        for section in pelib.sections:
            # print(section)
            dic_sec = {}
            dic_sec["name"] = repr(section.Name)

            dic_sec["size_raw_data"] = int(hex(section.SizeOfRawData), 16)
            dic_sec["virtual_size"] = int(hex(section.Misc_VirtualSize), 16)
            dic_sec["characteristics"] = hex(section.Characteristics)

            if (section.__dict__.get('IMAGE_SCN_MEM_WRITE', False) and
                    section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False)):
                dic_sec["write_executable"] = "True"
            else:
                dic_sec["write_executable"] = "False"

            data = section.get_data()
            # logging.debug("calculating hashes")
            dic_sec["sha1"] = SHA1(data)
            dic_sec["sha2"] = SHA256(data)
            dic_sec["md5"] = MD5(data)
            # logging.debug("calculating fuzzy")
            dic_sec["fuzzy_hash"] = getSsdeep(data)
            dic_sec["entropy"] = entropy.shannon_entropy(data) * 8
            # logging.debug("finished calculating")

            ret.append(dic_sec)

        return ret
Esempio n. 33
0
    def get_highentropy_files(self, ent_threshold=0.7):
        """Return list of files with higher entropy (encrypted, compressed)."""
        import entropy

        ignored_mimetypes = [
            "application/x-shockwave-flash", "application/x-font-",
            "application/pdf", "image/"
        ]

        for file in self.filelist:
            with open(file["filename"], "r") as f:
                buff = f.read(1024 * 1024)

                skip = False
                for mime in ignored_mimetypes:
                    if file["mime"].startswith(mime):
                        skip = True
                        break
                if not skip:
                    ent = entropy.shannon_entropy(buff)
                    if ent >= ent_threshold:
                        yield (file, ent)
Esempio n. 34
0
def test_gibson_assembly_class():
    """
    Most of the tests here are "data integrity" tests. The structure of the
    GibsonAssembler class has to be done right.
    """
    parts = [seq_generator(500) for i in range(3)]

    g = GibsonAssembler(parts)
    assume(len(set(parts)) > 1)  # make sure no duplicates exist
    for part in parts:
        assume(shannon_entropy(part) > 0.24)

    primer_names = ['fw_gibson', 're_gibson', '3p_sequencing', '5p_sequencing']
    for n, d in g.nodes(data=True):
        assert len(set(d.keys()).intersection(primer_names)) == 4

    assert len(g.nodes()) == len(g.edges())
    assert len(g.nodes()) == len(g.sequences)

    p = g.primers()
    assert len(p) == len(g.nodes())
    for part, primers in p.items():
        assert len(primers) == 4
Esempio n. 35
0
"""
Creates the most gibberish possible by compounding word
pieces and sorting the results based on highest entropy.
Inspired by moonbase alpha youtube videos.
"""   

import re
import entropy
import subprocess
import random
from collections import defaultdict

wordlist = open('words.txt','r')
somedict = defaultdict(list)
words = wordlist.readlines()
wordlist = [ word.strip() for word in words if re.search('rur[a-z]',word) ]


for i in range(0,10000):
    e_words = random.sample(wordlist,10)
    e_words = ''.join(e_words)

    cmd = ['/usr/bin/espeak','--stdout',e_words]

    if not cmd:
        continue

    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
    somedict[e_words] = entropy.shannon_entropy(proc.stdout.read())
    print (somedict[e_words]*100),e_words
Esempio n. 36
0
# Capture and parse commandline arguments
script, filename, minlength = argv

# Verify that the MinLength argument is a valid positive integer
try:
    val = int(minlength)
    if int(minlength) < 1:
        sys.exit("ERROR: Minimum domain length needs to be a positive integer")
except ValueError:
    sys.exit("ERROR: Minimum domain length needs to be a positive integer")

# Read in file, using || as the field delimiter
df = pd.read_csv(filename, delimiter=",", header=0)

# Extract Domain column (all rows) into a list
dflist = df.iloc[:, 3].tolist()

for item in dflist:
    domain = ""
    try:
        # Split domain out of FQDN and run entropy calculation
        domain, ext = str(item).split(".")[-2:]
        ent = entropy.shannon_entropy(domain)
    except ValueError:
        ent = "0.00"
    # If length of domain string is less than the minimum length, return 0.00
    if len(domain) > int(minlength) - 1:
        print(ent, ",", item, sep="")
    else:
        print("0.00", ",", item, sep="")
Esempio n. 37
0
 def assert_entropy(self, data, expected):
     assert_almost_equal(shannon_entropy(data), expected, places=3)
Esempio n. 38
0
starttime = time.time()
fivebit.decompress(fivebit.compress("".join(lst)))
print("Execution time: " + str(time.time() - starttime))



#Random gobbledegook words
print("\n\n\nGenerating 50000 gobbledegook random words..")
lst = []
for i in range(50000):	
	lst.append(teststr(random.randrange(1,15),1,255) + " ")

wordlist = "".join(lst)

print("Shannon entropy: " + str(entropy.shannon_entropy(wordlist)))

print("Testing compression time with dictionary enabled")
starttime = time.time()
d = fivebit.compress(wordlist,True)
print("Shannon entropy: " + str(entropy.shannon_entropy(d)))
print("Execution time: " + str(time.time() - starttime))
print("Testing compression time with dictionary disabled")
starttime = time.time()
nd = fivebit.compress(wordlist,False)
print("Execution time: " + str(time.time() - starttime))
print("Testing decompression time..")
starttime = time.time()
dec = fivebit.decompress(d)
print("Execution time: " + str(time.time() - starttime))
print("Uncompressed length: " + str(len(wordlist)) + " Dict compressed length: " + str(len(d)) + " Nodict compressed length: " + str(len(nd)) )
Esempio n. 39
0
File: era.py Progetto: bahusvel/ERA
		# page is not new and has changed
		print("Updating page {} to {}".format(page_number, decrypted_map[page_number], hash.hexdigest()))
		insert_page(output_file, page_number, data)
		decrypted_map[page_number] = hash_digest
	else:
		# page is new
		print("Page {} found {}".format(page_number, hash.hexdigest()))
		insert_page(output_file, page_number, data)
		decrypted_map[page_number] = hash_digest


while True:
	data = input_file.read(4096)
	if len(data) != 4096:
		break
	data_entropy = entropy.shannon_entropy(data)

	if data_entropy <= ENTROPY_THRESHOLD:
		# page is below entropy threshold so it is most likely decrypted
		print("Page {} is not encrypted ({})".format(page_number, data_entropy))
		upsert_page(page_number, data)

	elif page_number in entropy_map and data_entropy < entropy_map[page_number]:
		# page entropy value has decreased, THIS IS QUESTIONABLE but should be better anyway
		print("Entropy for page {} decreased from {} to {}".format(page_number, entropy_map[page_number], data_entropy))
		upsert_page(page_number, data)

	elif page_number not in decrypted_map:
		# if the page is not decrypted yet update it anyway, to avoid false negatives
		insert_page(output_file, page_number, data)
	entropy_map[page_number] = data_entropy
Esempio n. 40
0
import entropy

input_file = open("text.crypto", "rb")
data = input_file.read()

print("Shannon entropy is ", entropy.shannon_entropy(data))
Esempio n. 41
0
 def process(self):
     res=entropy.shannon_entropy(self.sample.getBinary()) * 8
     return res
Esempio n. 42
0
#!/usr/bin/python
import sys, os
if len(sys.argv) < 2: sys.exit("Usage: cryptanalysis.py encrypted.raw")
if not os.path.isfile(sys.argv[1]): sys.exit("File not found")
with open(sys.argv[1], "rb") as f: cipher = f.read()
print '''
[+]----------[ Cryptanalysis by t3h XRUST ]----------------------------------[+]
 |
 + Common Structures:
 |	* Fixed-length data
 |	* Variable-length data with separator chars
 |	* Variable-length data with length fields
 |
 + Common Mistakes:
 |	* Home-grown encryption
 |	* Insecure cipher mode (ECB, CBC, OFB, ...)
 |	* Poor key selection / Insufficient key length / Key reuse
 |	* Insecure random number generator
 |
'''
import entropy
print "+ Entropy:           %s" % entropy.shannon_entropy(cipher)
import collections
freq = collections.Counter(cipher)
print "+ Common Characters: %s" % freq.most_common(5)
length = len(cipher)
print "+ Ciphertext Length: %d bytes" % length
print "|---  8 byte blocks: %d (remainder: %d bytes)" % (length/8, length%8)
print "|--- 16 byte blocks: %d (remainder: %d bytes)" % (length/16, length%16)
print "|\n+" + "-"*40 + "+++"
Esempio n. 43
0
import entropy

input_file = open("../python-mem/text.crypto", "rb")
count = 1
while True:
	data = input_file.read(4096)
	if len(data) != 4096:
		break
	print("Page {}:{}".format(count, entropy.shannon_entropy(data)))
	count += 1


Esempio n. 44
0
File: simpl.py Progetto: tuxxy/simpl
 def count_entropy(self, password):
     """ Counts shannon entropy of password."""
     print("Shannon Entropy count: {}".format(entropy.shannon_entropy(password)))