Python shannon_entropy 예제들, entropy.shannon_entropy Python 예제들

예제 #1

0

파일 보기

파일: exploitUriBehaviors.py 프로젝트: Safe3/aktaion2

    def min_entropy(inList):
        """Returns the minimum shannon entropy of URIs in the list"""

        minEntropy = en.shannon_entropy(inList[0])

        for uri in inList:
            if minEntropy > en.shannon_entropy(uri):
                minEntropy = en.shannon_entropy(uri)

        return(minEntropy)

예제 #2

0

파일 보기

 def __iter__(self):
     for word in self.context.data.split():
         if self.config['word_length_min'] <= len(
                 word) <= self.config['word_length_max']:
             logger.debug(
                 "found word ({}) that matched length constaints of min:{} and max:{}".\
                     format(word,
                            self.config['word_length_min'],
                            self.config['word_length_max']))
             if shannon_entropy(word) >= self.config['entropy_min']:
                 yield self.create_secret(shannon_entropy(word), word)

예제 #3

0

파일 보기

파일: exploitUriBehaviors.py 프로젝트: Safe3/aktaion2

    def max_entropy(inList):
        """returns the maximum shannon entropy of URIs in the list"""
        try:
            maxEntropy = en.shannon_entropy(inList[0])
        except(IndexError):
            maxEntropy = en.shannon_entropy(inList)

        for uri in inList:
            if maxEntropy <  en.shannon_entropy(uri):
                maxEntropy = en.shannon_entropy(uri)

        return(maxEntropy)

예제 #4

0

파일 보기

파일: spora_config.py 프로젝트: coldshell/spora-config

def decrypt_str(args, binary, len_str, key):
    """Decrypt Spora's config"""

    # Save AES key
    if args.output_dir:
        out_dir = "{:}/{:}/".format(args.output_dir,
                                    os.path.basename(args.file))
        out_path = out_dir + "AES256.key"

        # Check for the output directory
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        # Write the decrypted file
        with open(out_path, 'w') as f:
            f.write(key)

    # Decrypt data
    for call in len_str:
        # Init Crypto stuff
        h = SHA256.new()
        c = AES.new(key, AES.MODE_CBC, '\x00' * 16)

        enc_bytes_list = get_bin_bytes(binary, call["str"], call["len"])
        enc_bytes_str = b''.join([chr(i) for i in enc_bytes_list])

        dec_bytes_str = c.decrypt(enc_bytes_str)

        h.update(dec_bytes_str)
        entropy = {
            "enc": shannon_entropy(enc_bytes_str),
            "dec": shannon_entropy(dec_bytes_str)
        }

        # Print file hash and size
        if args.verbose >= 1:
            print "\nFile decrypted SHA256: {:}, size: {:}".format(
                h.hexdigest(), call["len"])

        # Print entropy
        if args.verbose >= 2:
            print "Entropy of {:}: before = {:}, after = {:}".format(
                h.hexdigest(), entropy["enc"], entropy["dec"])
        # Save the decrypted file
        if args.output_dir:
            out_path = out_dir + h.hexdigest()

            # Write the decrypted file
            with open(out_path, 'w') as f:
                f.write(dec_bytes_str)

        if args.print_config:
            print "{:}".format(dec_bytes_str)

예제 #5

0

파일 보기

def score_domain(domain):
    """Score `domain`.

    The highest score, the most probable `domain` is a phishing site.

    Args:
        domain (str): the domain to check.

    Returns:
        int: the score of `domain`.
    """
    score = 0
    for tld in suspicious_tld:
        if domain.endswith(tld):
            score += 20
    for keyword in suspicious_keywords:
        if keyword in domain:
            score += 25
    for keyword in highly_suspicious:
        if keyword in domain:
            score += 60
    score += int(round(entropy.shannon_entropy(domain) * 50))

    # Lots of '-' (ie. www.paypal-datacenter.com-acccount-alert.com)
    if 'xn--' not in domain and domain.count('-') >= 4:
        score += 20

    # Deeply nested subdomains (ie. www.paypal.com.security.accountupdate.gq)
    if domain.count('.') >= 4:
        score += 20
    return score

예제 #6

0

파일 보기

def compute_entropy(region_file: str, tabix_file: str):

    tx = tabix.open(tabix_file)

    with open(region_file) as regions:
        reader = csv.reader(regions, delimiter="\t")
        for region in reader:
            # avoid header line #
            if str(region[0]).startswith("#") == True:
                print("\t".join(region), "entropy", sep="\t")
            else:
                chromosome = region[0]
                start = int(region[1])
                end = int(region[2])
                size = end - start
                serie = [0] * size

                for record in tx.query(chromosome, start, end):
                    t_start = int(record[1])
                    t_end = int(record[2])
                    index = t_start - start
                    serie[index] += 1

                serie_str = "".join(str(i) for i in serie)
                e = entropy.shannon_entropy(serie_str)

                print("\t".join(region), e, sep="\t")

예제 #7

0

파일 보기

파일: SignalProcessor.py 프로젝트: mangoklie/MiniProyecto

 def get_t_wave_durations_entropy(self):
     t_wave_durations = self.get_t_wave_durations().ravel()
     ''' hist, bin_edges = np.histogram(t_wave_durations, 'auto')
     bin_map_t_waves = np.digitize(t_wave_durations, bin_edges[:-1])
     bin_map_t_waves = np.array(list(map(lambda x: hist[x-1]/len(t_wave_durations), bin_map_t_waves))) '''
     #return sps.entropy(bin_map_t_waves, base = 2)
     return shannon_entropy(t_wave_durations)

예제 #8

0

파일 보기

파일: IndexerLib.py 프로젝트: chrisbvt/ML-MalwareDetection

def get_entropy_of_file(FilePath):
    file_read = open(FilePath, 'r')
    file_data = file_read.read()

    entropy_file = entropy.shannon_entropy(file_data)
    file_read.close()
    return entropy_file

예제 #9

0

파일 보기

파일: SignalProcessor.py 프로젝트: mangoklie/MiniProyecto

 def get_rr_interval_durations_entropy(self):
     p_wave_durations = self.segments.get('rr_interval')
     ''' hist, bin_edges = np.histogram(p_wave_durations, 'auto')
     bin_map_p_waves = np.digitize(p_wave_durations, bin_edges[:-1])
     bin_map_p_waves = np.array(list(map(lambda x: hist[x-1]/len(p_wave_durations), bin_map_p_waves))) '''
     #return sps.entropy(bin_map_p_waves, base = 2)
     return shannon_entropy(p_wave_durations)

예제 #10

0

파일 보기

파일: catch_phishing.py 프로젝트: jibrilha/jh_phishing_sites_catcher

def score_domain(domain):
    """Score `domain`.

    The highest score, the most probable `domain` is a phishing site.

    Args:
        domain (str): the domain to check.

    Returns:
        int: the score of `domain`.
    """
    score = 0
    for t in suspicious['tlds']:
        if domain.endswith(t):
            score += 20

    # Remove initial '*.' for wildcard certificates bug
    if domain.startswith('*.'):
        domain = domain[2:]

    # Removing TLD to catch inner TLD in subdomain (ie. paypal.com.domain.com)
    try:
        res = get_tld(domain, as_object=True, fail_silently=True, fix_protocol=True)
        domain = '.'.join([res.subdomain, res.domain])
    except Exception:
        pass

    # Higer entropy is kind of suspicious
    score += int(round(entropy.shannon_entropy(domain)*50))

    # Remove lookalike characters using list from http://www.unicode.org/reports/tr39
    domain = unconfuse(domain)

    words_in_domain = re.split("\W+", domain)

    # ie. detect fake .com (ie. *.com-account-management.info)
    if words_in_domain[0] in ['com', 'net', 'org']:
        score += 10

    # Testing keywords
    for word in suspicious['keywords']:
        if word in domain:
            score += suspicious['keywords'][word]

    # Testing Levenshtein distance for strong keywords (>= 70 points) (ie. paypol)
    for key in [k for (k,s) in suspicious['keywords'].items() if s >= 70]:
        # Removing too generic keywords (ie. mail.domain.com)
        for word in [w for w in words_in_domain if w not in ['email', 'mail', 'cloud']]:
            if distance(str(word), str(key)) == 1:
                score += 70

    # Lots of '-' (ie. www.paypal-datacenter.com-acccount-alert.com)
    if 'xn--' not in domain and domain.count('-') >= 4:
        score += domain.count('-') * 3

    # Deeply nested subdomains (ie. www.paypal.com.security.accountupdate.gq)
    if domain.count('.') >= 3:
        score += domain.count('.') * 3

    return score

예제 #11

0

파일 보기

def subdomain_entropy(hostname: str) -> float:
    domain = get_domain(hostname)
    if pd.isna(domain):
        return np.nan

    subdomain = hostname[:-len(domain)]
    return shannon_entropy(subdomain)

예제 #12

0

파일 보기

파일: SignalProcessor.py 프로젝트: mangoklie/MiniProyecto

 def get_pr_interval_durations_entropy(self):
     pr_intervals = self.get_pr_intervals().ravel()
     ''' hist, bin_edges = np.histogram(pr_intervals,'auto')
     bin_map_pr_interval = np.digitize(pr_intervals,bin_edges[:-1])
     bin_map_pr_interval = np.array(list(map(lambda x: hist[x-1]/len(pr_intervals), bin_map_pr_interval))) '''
     #return sps.entropy(bin_map_pr_interval, base = 2)
     return shannon_entropy(pr_intervals)

예제 #13

0

파일 보기

def score_domain(domain):
    """Score `domain`.
    The highest score, the most probable `domain` is a phishing site.
    Args:
        domain (str): the domain to check.
    Returns:
        int: the score of `domain`.
    """
    score = 0
    mult = 1
    for tld in suspicious_tld:
        if domain.endswith(tld):
            score += 20
            mult += 1
    for keyword in suspicious_keywords:
        if keyword in domain:
            score += 25
            mult += 1
    for keyword in highly_suspicious:
        if keyword in domain:
            score += 60
            mult += 1
    score += (int(round(entropy.shannon_entropy(domain)*50))) * mult

    # Lots of '-' (ie. www.paypal-datacenter.com-acccount-alert.com)
    if 'xn--' not in domain and domain.count('-') >= 4:
        score += 20
    return score

예제 #14

0

파일 보기

def alleged_domain(phishy):
    """How sketchy is the domain in question?  Performs statistical, symantic, and other reasoning techniques to
        separate the wheat from the chaff
        input:
        phishy - the domain (str)

        returns - score (int or float depending on the quant. techniques
        """

    score = 0
    for _tld in prefixes:
        if phishy.endswith(_tld):
            score += 20

    if phishy.startswith('*.'):
        phishy = phishy[2:]

    # https://arstechnica.com/information-technology/2017/06/phishing-attacks-target-mobile-browsers-with-dash-padded-urls/
    try:
        res = get_tld(phishy, as_object=True, fail_silently=True, fix_protocol=True)
        phishy = '.'.join([res.subdomain, res.domain])
    except Exception as exc:
        pass

    words_in_domain = re.split("\W+", phishy)

    # How fun are wildcards?  Not fun with language parsers
    if phishy.startswith('*.'):
        phishy = phishy[2:]
        if words_in_domain[0] in ['com', 'net', 'org']:
            score += 10

    # Testing keywords
    for word, val in phrases.iteritems():
        if word in phishy:
            score += phrases[word]

    # Too random?
    score += int(round(entropy.shannon_entropy(phishy) * 50.2))

    # How likely is this like others?
    for key in [k for (k, s) in phrases.items() if s >= 70]:
        # Massaging dataset massaging with normalization
        for word in [w for w in words_in_domain if w not in ['cloud', 'mail', 'email']]:
            if ratio(str(word), str(key)) == 1:
                score += 70

    '''Markov chain confusion Not released to the public'''
    '''K closest neighbors and cluster analysis (similar to Levenshstein ratios) not released to the public'''

    #Oh China....
    if 'xn--' not in phishy and phishy.count('-') >= 4:
        score += phishy.count('-') * 3

    # Humans rarely, rationally pick 3+ subdomains deep
    if phishy.count('.') >= 3:
        score += phishy.count('.') * 3

    return score

예제 #15

0

파일 보기

def score_domain(domain):
    """Score `domain`.
    The highest score, the most probable `domain` is a phishing site.
    Args:
        domain (str): the domain to check.
    Returns:
        int: the score of `domain`.
    #https://github.com/x0rz/phishing_catcher/blob/master/catch_phishing.py
    """
    score = 0
    for t in tlds:
        if domain.endswith(t):
            score += 20

    # Remove initial '*.' for wildcard certificates bug
    if domain.startswith("*."):
        domain = domain[2:]

    # Removing TLD to catch inner TLD in subdomain (ie. paypal.com.domain.com)
    try:
        res = get_tld(domain, as_object=True, fail_silently=True, fix_protocol=True)
        domain = ".".join([res.subdomain, res.domain])
    except:  # noqa: B110
        pass

    words_in_domain = re.split("\W+", domain)

    # Remove initial '*.' for wildcard certificates bug
    if domain.startswith("*."):
        domain = domain[2:]
        # ie. detect fake .com (ie. *.com-account-management.info)
        if words_in_domain[0] in ["com", "net", "org"]:
            score += 10

    # Testing keywords
    for word in keywords.keys():
        if word in domain:
            score += keywords[word]

    # Higer entropy is kind of suspicious
    score += int(round(entropy.shannon_entropy(domain) * 50))

    # Testing Levenshtein distance for strong keywords (>= 70 points) (ie. paypol)
    for key in [k for (k, s) in keywords.items() if s >= 70]:
        # Removing too generic keywords (ie. mail.domain.com)
        for word in [w for w in words_in_domain if w not in ["email", "mail", "cloud"]]:
            if distance(str(word), str(key)) == 1:
                score += 70

    # Lots of '-' (ie. www.paypal-datacenter.com-acccount-alert.com)
    if "xn--" not in domain and domain.count("-") >= 4:
        score += domain.count("-") * 3

    # Deeply nested subdomains (ie. www.paypal.com.security.accountupdate.gq)
    if domain.count(".") >= 3:
        score += domain.count(".") * 3

    return score

예제 #16

0

파일 보기

 def execute_all_measurements(self, base64_expression_decoded):
     results = dict()
     results['entropy'] = shannon_entropy(base64_expression_decoded)
     results['strings'] = self.words_in_strings(
         base64_expression_decoded, self._word_list,
         int(self.config[self.NAME]['string_min_length']))
     results['filetype'] = get_file_type_from_binary(
         base64_expression_decoded)
     return results

예제 #17

0

파일 보기

파일: microbehavior_logic.py 프로젝트: yue123161/ransomware-detection-with-deep-learning

    def max_entropy(inList):
        """returns the maximum shannon entropy of URIs in the list"""
        try:
            maxEntropy = en.shannon_entropy(inList[0])
        except (IndexError, TypeError, KeyError):
            try:
                maxEntropy = en.shannon_entropy(inList)
            except (TypeError):
                maxEntropy = 0.0

        for uri in inList:
            try:
                if maxEntropy < en.shannon_entropy(uri):
                    maxEntropy = en.shannon_entropy(uri)
            except (IndexError, TypeError, KeyError):
                print()

        return (maxEntropy)

예제 #18

0

파일 보기

def score_domain(config, domain, args):
    """ """
    # dbugger = ['------------------------------------------------------------']
    # dbugger.append(domain)
    score = 0
    for t in config["tlds"]:
        if domain.endswith(t):
            score += 20
            # dbugger.append("TLD: {}".format(t))

    try:
        res = get_tld(domain, as_object=True, fail_silently=True, fix_protocol=True)

        if res is not None:
            domain = '.'.join([res.subdomain, res.domain])
    except Exception as err:
        failed_message(args, err, domain)
        pass

    score += int(round(entropy.shannon_entropy(domain)*50))
    # dbugger.append("Entropy: {}".format(int(round(entropy.shannon_entropy(domain)*50))))

    domain          = unconfuse(domain)
    words_in_domain = re.split(r"\W+", domain)

    if words_in_domain[0] in ["com", "net", "org"]:
        score += 10
        # dbugger.append("Com-net-org: {}".format(words_in_domain[0]))

    for word in config["keywords"]:
        if word in domain:
            score += config["keywords"][word]
            # dbugger.append("Keyword: {}".format(len(config["keywords"])))
            # dbugger.append("Keyword: {}".format(word))

    for key in [k for (k,s) in config["keywords"].items() if s >= 70]:
        for word in [w for w in words_in_domain if w not in ["email", "mail", "cloud"]]:
            if distance(str(word), str(key)) == 1:
                score += 70
                # dbugger.append("Distance: {}, {}".format(str(word), str(key)))

    if "xn--" not in domain and domain.count("-") >= 4:
        score += domain.count("-") * 3
        # dbugger.append("Count dashes: {}".format(domain.count(".")))

    if domain.count(".") >= 3:
        score += domain.count(".") * 3
        # dbugger.append("Count period: {}".format(domain.count(".")))

    # dbugger.append("\nScore: {}".format(score))
    # dbugger.append('------------------------------------------------------------')

    # with open("dbug_file", "a") as dbug_file:
    #     for dbug in dbugger:
    #         dbug_file.write("{}\n".format(dbug))
    return score

예제 #19

0

파일 보기

파일: SignalProcessor.py 프로젝트: mangoklie/MiniProyecto

 def entropy(segments):
     try:
         ''' hist, bin_edges = np.histogram(segments,'auto')
         bin_map_pr_interval = np.digitize(segments,bin_edges[:-1])
         bin_map_pr_interval = np.array(list(map(lambda x: hist[x-1]/len(segments), bin_map_pr_interval)))
         return sps.entropy(bin_map_pr_interval, base = 2) '''
         return shannon_entropy(segments)
     except Exception as e:
         print(str(e), file = sys.stderr)
         return 0.0

예제 #20

0

파일 보기

파일: plots.py 프로젝트: FrancoPalau/Deep-Active-Learning

def plot_scatter(legit, dga):
    legit_len, legit_entropy, dga_len, dga_entropy = [], [], [], []
    for x in legit:
        legit_len.append(len(x))
        legit_entropy.append(entropy.shannon_entropy(x))
    for x in dga:
        dga_len.append(len(x))
        dga_entropy.append(entropy.shannon_entropy(x))
    plt.scatter(legit_len,
                legit_entropy,
                s=140,
                c='#aaaaff',
                label='Legit',
                alpha=.2)
    plt.scatter(dga_len, dga_entropy, s=40, c='r', label='DGA', alpha=.3)
    plt.legend()
    plt.xlabel('Domain Length')
    plt.ylabel('Domain Entropy')
    plt.show()

예제 #21

0

파일 보기

파일: domain_parser.py 프로젝트: lcy17code/lcy17code.github.io

 def is_chrome_dn(self, dn):
     dn_segs = dn.split('.')
     num_segs = len(dn_segs)
     alpha_num = 0
     random_len = len(dn_segs[0])
     if random_len >= 10 and random_len <= 10 and shannon_entropy(
             dn_segs[0]) > 0.30:
         for letter in dn_segs[0]:
             if letter in self.ALPHABET:
                 alpha_num += 1
         if alpha_num == random_len:
             return True
     return False

예제 #22

0

파일 보기

파일: tofrom_basic_entropy.py 프로젝트: red-bin/entropystuff

def gentropy(email, compare=None):
    particle = email.pop('body')

    try:
        ent = entropy.shannon_entropy(particle)
        email['entropy'] = ent

        email['date'] = email['date'].isoformat() #convert to str

    except:
        print "[ERROR] Failed to parse: %s " % (particle)

    return email

예제 #23

0

파일 보기

파일: demographics.py 프로젝트: josephneumann/fhir_server

def validate_ssn(ssn):
    """
    Utility function to normalize social security numbers (SSN)

    :param ssn: 
        Type: String
        Default: None
        Description: The SSN to normalize   
        
    :return: 
        Function accepts any SSN string and, if determined to be valid, outputs 
        the SSN in the format 'XXX-XX-XXXX'
        
        If SSN argument is invalid, None is returned
        
        Invalid SSNs are:
        1) Not equal to 9 numeric digits in length
        2) Equal to known "bad_ssns" values like "123456789"
        3) Numbers with all 0's in any digit group like "000-XX-XXXX" or "XXX-00-XXXX" or "XXX-XX-0000"
        4) Numbers in first digit group between "900" and "999"
        5) Numbers with a Shannon Entropy value <.16 like "111-22-2222"
    """
    if not ssn:
        return None
    bad_ssns = ['123456789']
    numeric_digits = re.compile(r'[^0-9]+')
    ssn_digits = numeric_digits.sub('', ssn)
    if len(ssn_digits) != 9:
        ssn_digits = None
        raise ValueError(
            'The value passed as an SSN was not nine numeric digits in length: {}'
            .format(ssn))
    elif ssn_digits:
        ssn_compile = re.compile(
            r'.*([0-8][0-9]{2}).*([0-9]{2}).*([0-9]{4}).*')
        n_ssn = ssn_compile.match(ssn_digits)
        if n_ssn:
            n_ssn_digits = str('{}{}{}'.format(n_ssn.group(1), n_ssn.group(2),
                                               n_ssn.group(3)))
            if (n_ssn_digits in bad_ssns) or (n_ssn.group(1) in [
                    '666', '000'
            ]) or (n_ssn.group(2) in ['00']) or (n_ssn.group(3) in ['0000']):
                raise ValueError(
                    'An invalid value was supplied as an SSN: '.format(ssn))
            elif shannon_entropy(n_ssn_digits) < .16:
                raise ValueError(
                    'The value supplied as an SSN does not pass shannon entropy requirements: '
                    .format(ssn))
            else:
                return str('{}{}{}'.format(n_ssn.group(1), n_ssn.group(2),
                                           n_ssn.group(3)))

예제 #24

0

파일 보기

def score_domain(domain):
    score = 0
    for tld in tlds:
        if domain.endswith(tld):
            score += 20

    # for wildcard certs, remove *.
    if domain.startswith('*.'):
        domain = domain[2:]

    try:
        res = get_tld(domain,
                      as_object=True,
                      fail_silentyl=True,
                      fix_protocol=True)
        domain = '.'.join([res.subdomain, res.domain])
    except:
        pass

    words_in_domain = re.split("\W+", domain)

    # for wildcard certs, remove *.
    if domain.startswith('*.'):
        domain = domain[2:]
        if words_in_domain[0] in ['com', 'net', 'org']:
            score += 10

    for word in keywords.keys():
        if word in domain:
            score += keywords[word]

    score += int(round(entropy.shannon_entropy(domain) * 50))

    for key in [k for (k, s) in keywords.items() if s >= 70]:
        for word in [
                w for w in words_in_domain
                if w not in ['email', 'mail', 'cloud']
        ]:
            if distance(str(word), str(key)) == 1:
                score += 70

    if 'xn--' not in domain and domain.count('-') >= 4:
        score += domain.count('-') * 3

    if domain.count('.') >= 3:
        score += domain.count('.') * 3

    return score

예제 #25

0

파일 보기

파일: static.py 프로젝트: muhzii/cuckoo

    def _enumerate_encrypted_assets(self):
        """Returns a list of files in the APK assets that have high entropy."""
        files = []
        for filename, filetype in self.apk.get_files_types().items():
            if "assets" in filename:
                buf = self.apk.zip.read(filename)
                file_entropy = entropy.shannon_entropy(buf)
                if file_entropy > 0.9:
                    files.append({
                        "name": filename,
                        "entropy": file_entropy,
                        "size": len(buf),
                        "type": filetype,
                    })

        return files

예제 #26

0

파일 보기

파일: commons.py 프로젝트: ooo777/analyst_arsenal

def score_domain(suspicious, domain, args):
    """ """
    score = 0
    for t in suspicious["tlds"]:
        if domain.endswith(t):
            score += 20

    try:
        res = get_tld(domain,
                      as_object=True,
                      fail_silently=True,
                      fix_protocol=True)

        if res is not None:
            domain = '.'.join([res.subdomain, res.domain])
    except Exception as err:
        failed_message(args, err, domain)
        pass

    score += int(round(entropy.shannon_entropy(domain) * 50))

    domain = unconfuse(domain)
    words_in_domain = re.split(r"\W+", domain)

    if words_in_domain[0] in ["com", "net", "org"]:
        score += 10

    for word in suspicious["keywords"]:
        if word in domain:
            score += suspicious["keywords"][word]

    for key in [k for (k, s) in suspicious["keywords"].items() if s >= 70]:
        for word in [
                w for w in words_in_domain
                if w not in ["email", "mail", "cloud"]
        ]:
            if distance(str(word), str(key)) == 1:
                score += 70

    if "xn--" not in domain and domain.count("-") >= 4:
        score += domain.count("-") * 3

    if domain.count(".") >= 3:
        score += domain.count(".") * 3

    return score

예제 #27

0

파일 보기

 def find_encrypted_assets(self, apk):
     """Returns a dict of files in the APK assets that have high entropy."""
     ret = []
     for fname, filetype in apk.get_files_types().items():
         if "assets" in fname:
             if ".png" in fname and "png" in filetype.lower():
                 continue
             buf = apk.zip.read(fname)
             file_entropy = entropy.shannon_entropy(buf)
             if file_entropy > 0.9:
                 ret.append({
                     "name": fname,
                     "entropy": file_entropy,
                     "size": len(buf),
                     "type": filetype,
                 })
     return ret

예제 #28

0

파일 보기

def score_domain(domain):
    score = 0
    for tld in suspicious_tld:
        if domain.endswith(tld):
            score += 20
    for keyword in suspicious_keywords:
        if keyword in domain:
            score += 25
    for keyword in highly_suspicious:
        if keyword in domain:
            score += 60
    score += int(round(entropy.shannon_entropy(domain) * 50))

    # Lots of '-' (ie. www.paypal-datacenter.com-acccount-alert.com)
    if not 'xn--' in domain and domain.count('-') >= 4:
        score += 20
    return score

예제 #29

0

파일 보기

def score_domain(provided_ioc):
    """Return the scores of the provided domain."""
    score = 0

    for suspicious_tld in suspicious["tlds"]:
        if provided_ioc.endswith(suspicious_tld):
            score += 20

    try:
        res = tld.get_tld(provided_ioc,
                          as_object=True,
                          fail_silently=True,
                          fix_protocol=True)
        domain = ".".join([res.subdomain, res.domain])
    except Exception:
        domain = provided_ioc

    score += int(round(entropy.shannon_entropy(domain) * 50))
    domain = confusables.unconfuse(domain)
    words_in_domain = re.split("\W+", domain)

    if domain.startswith("*."):
        domain = domain[2:]

        if words_in_domain[0] in ["com", "net", "org"]:
            score += 10

    for word in suspicious["keywords"]:
        if word in domain:
            score += suspicious["keywords"][word]

    for key in [k for k, v in suspicious["keywords"].items() if v >= 70]:
        for word in [
                w for w in words_in_domain
                if w not in ["email", "mail", "cloud"]
        ]:
            if pylev.levenshtein(str(word), str(key)) == 1:
                score += 70

    if "xn--" not in domain and domain.count("-") >= 4:
        score += domain.count("-") * 3

    if domain.count(".") >= 3:
        score += domain.count(".") * 3
    return score

예제 #30

0

파일 보기

파일: vige_xor_cracker.py 프로젝트: Vigeant/xorcrack

def findKeyLen(data, maxKeyLen):
	table = {}
	for keylen in range(1,maxKeyLen):
		entsum = 0
		for i in range(keylen):
			subtable = data[i::keylen]
			entsum += entropy.shannon_entropy(subtable)
		averageent = entsum / keylen
		table[keylen] = averageent
		print "keylen: %02d, average entropy: %f" % (keylen , averageent)
	
	keys = sorted(table, key=table.__getitem__)
	probablekeys = {}
	a = 1000
	for kl in keys:
		if table[kl] < a:
			a = table[kl]
		if table[kl] - a < 0.1:
			probablekeys[kl] = table[kl]
	return sorted(probablekeys)[0]

예제 #31

0

파일 보기

파일: semantics.py 프로젝트: x0rzkov/twintel

def keywords(data):

    # retweet_count

    d6 = pd.DataFrame()

    # tweet entropy
    l = []
    for i in range(len(data.text)):
        l.append(entropy.shannon_entropy(data.text[i]))

    d6['tweet_entropy'] = pd.Series(l)

    # number of words in tweet
    l = []
    for i in range(len(data.text)):
        l.append(len(pd.Series(data.text[i].split())))

    d6['no_of_words'] = pd.Series(l)

    # % of unique words in the tweet
    l = []
    for i in range(len(data.text)):

        l.append(len(data.text[i]))

    d6['tweet_length'] = pd.Series(l)

    d6x = pd.DataFrame({
        'sum': d6.sum().astype('int'),
        'median': d6.median(),
        'mean': d6.mean(),
        'std': d6.std()
    })

    d6x = d6x.round(decimals=3)

    del d6

    return d6x

예제 #32

0

파일 보기

파일: SectionsPlug.py 프로젝트: codexgigassys/codex-backend

    def process(self):
        # print("SECTIONS")
        # logging.debug("loading pefile")
        pelib = self._getLibrary(PEFileModule().getName())
        if(pelib is None):
            return ""

        # logging.debug("iterating sections")
        ret = []
        number = 0

        for section in pelib.sections:
            # print(section)
            dic_sec = {}
            dic_sec["name"] = repr(section.Name)

            dic_sec["size_raw_data"] = int(hex(section.SizeOfRawData), 16)
            dic_sec["virtual_size"] = int(hex(section.Misc_VirtualSize), 16)
            dic_sec["characteristics"] = hex(section.Characteristics)

            if (section.__dict__.get('IMAGE_SCN_MEM_WRITE', False) and
                    section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False)):
                dic_sec["write_executable"] = "True"
            else:
                dic_sec["write_executable"] = "False"

            data = section.get_data()
            # logging.debug("calculating hashes")
            dic_sec["sha1"] = SHA1(data)
            dic_sec["sha2"] = SHA256(data)
            dic_sec["md5"] = MD5(data)
            # logging.debug("calculating fuzzy")
            dic_sec["fuzzy_hash"] = getSsdeep(data)
            dic_sec["entropy"] = entropy.shannon_entropy(data) * 8
            # logging.debug("finished calculating")

            ret.append(dic_sec)

        return ret

예제 #33

0

파일 보기

    def get_highentropy_files(self, ent_threshold=0.7):
        """Return list of files with higher entropy (encrypted, compressed)."""
        import entropy

        ignored_mimetypes = [
            "application/x-shockwave-flash", "application/x-font-",
            "application/pdf", "image/"
        ]

        for file in self.filelist:
            with open(file["filename"], "r") as f:
                buff = f.read(1024 * 1024)

                skip = False
                for mime in ignored_mimetypes:
                    if file["mime"].startswith(mime):
                        skip = True
                        break
                if not skip:
                    ent = entropy.shannon_entropy(buff)
                    if ent >= ent_threshold:
                        yield (file, ent)

예제 #34

0

파일 보기

파일: test_assembly.py 프로젝트: ericmjl/mbtools

def test_gibson_assembly_class():
    """
    Most of the tests here are "data integrity" tests. The structure of the
    GibsonAssembler class has to be done right.
    """
    parts = [seq_generator(500) for i in range(3)]

    g = GibsonAssembler(parts)
    assume(len(set(parts)) > 1)  # make sure no duplicates exist
    for part in parts:
        assume(shannon_entropy(part) > 0.24)

    primer_names = ['fw_gibson', 're_gibson', '3p_sequencing', '5p_sequencing']
    for n, d in g.nodes(data=True):
        assert len(set(d.keys()).intersection(primer_names)) == 4

    assert len(g.nodes()) == len(g.edges())
    assert len(g.nodes()) == len(g.sequences)

    p = g.primers()
    assert len(p) == len(g.nodes())
    for part, primers in p.items():
        assert len(primers) == 4

예제 #35

0

파일 보기

파일: random_espeak.py 프로젝트: red-bin/entropystuff

"""
Creates the most gibberish possible by compounding word
pieces and sorting the results based on highest entropy.
Inspired by moonbase alpha youtube videos.
"""   

import re
import entropy
import subprocess
import random
from collections import defaultdict

wordlist = open('words.txt','r')
somedict = defaultdict(list)
words = wordlist.readlines()
wordlist = [ word.strip() for word in words if re.search('rur[a-z]',word) ]


for i in range(0,10000):
    e_words = random.sample(wordlist,10)
    e_words = ''.join(e_words)

    cmd = ['/usr/bin/espeak','--stdout',e_words]

    if not cmd:
        continue

    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
    somedict[e_words] = entropy.shannon_entropy(proc.stdout.read())
    print (somedict[e_words]*100),e_words

예제 #36

0

파일 보기

파일: dns-entropy1.py 프로젝트: dsplice/projects

# Capture and parse commandline arguments
script, filename, minlength = argv

# Verify that the MinLength argument is a valid positive integer
try:
    val = int(minlength)
    if int(minlength) < 1:
        sys.exit("ERROR: Minimum domain length needs to be a positive integer")
except ValueError:
    sys.exit("ERROR: Minimum domain length needs to be a positive integer")

# Read in file, using || as the field delimiter
df = pd.read_csv(filename, delimiter=",", header=0)

# Extract Domain column (all rows) into a list
dflist = df.iloc[:, 3].tolist()

for item in dflist:
    domain = ""
    try:
        # Split domain out of FQDN and run entropy calculation
        domain, ext = str(item).split(".")[-2:]
        ent = entropy.shannon_entropy(domain)
    except ValueError:
        ent = "0.00"
    # If length of domain string is less than the minimum length, return 0.00
    if len(domain) > int(minlength) - 1:
        print(ent, ",", item, sep="")
    else:
        print("0.00", ",", item, sep="")

예제 #37

0

파일 보기

파일: test_entropy.py 프로젝트: armbues/python-entropy

 def assert_entropy(self, data, expected):
     assert_almost_equal(shannon_entropy(data), expected, places=3)

예제 #38

0

파일 보기

파일: ft.py 프로젝트: Dankoozie/fivebit

starttime = time.time()
fivebit.decompress(fivebit.compress("".join(lst)))
print("Execution time: " + str(time.time() - starttime))



#Random gobbledegook words
print("\n\n\nGenerating 50000 gobbledegook random words..")
lst = []
for i in range(50000):	
	lst.append(teststr(random.randrange(1,15),1,255) + " ")

wordlist = "".join(lst)

print("Shannon entropy: " + str(entropy.shannon_entropy(wordlist)))

print("Testing compression time with dictionary enabled")
starttime = time.time()
d = fivebit.compress(wordlist,True)
print("Shannon entropy: " + str(entropy.shannon_entropy(d)))
print("Execution time: " + str(time.time() - starttime))
print("Testing compression time with dictionary disabled")
starttime = time.time()
nd = fivebit.compress(wordlist,False)
print("Execution time: " + str(time.time() - starttime))
print("Testing decompression time..")
starttime = time.time()
dec = fivebit.decompress(d)
print("Execution time: " + str(time.time() - starttime))
print("Uncompressed length: " + str(len(wordlist)) + " Dict compressed length: " + str(len(d)) + " Nodict compressed length: " + str(len(nd)) )

예제 #39

0

파일 보기

파일: era.py 프로젝트: bahusvel/ERA

		# page is not new and has changed
		print("Updating page {} to {}".format(page_number, decrypted_map[page_number], hash.hexdigest()))
		insert_page(output_file, page_number, data)
		decrypted_map[page_number] = hash_digest
	else:
		# page is new
		print("Page {} found {}".format(page_number, hash.hexdigest()))
		insert_page(output_file, page_number, data)
		decrypted_map[page_number] = hash_digest


while True:
	data = input_file.read(4096)
	if len(data) != 4096:
		break
	data_entropy = entropy.shannon_entropy(data)

	if data_entropy <= ENTROPY_THRESHOLD:
		# page is below entropy threshold so it is most likely decrypted
		print("Page {} is not encrypted ({})".format(page_number, data_entropy))
		upsert_page(page_number, data)

	elif page_number in entropy_map and data_entropy < entropy_map[page_number]:
		# page entropy value has decreased, THIS IS QUESTIONABLE but should be better anyway
		print("Entropy for page {} decreased from {} to {}".format(page_number, entropy_map[page_number], data_entropy))
		upsert_page(page_number, data)

	elif page_number not in decrypted_map:
		# if the page is not decrypted yet update it anyway, to avoid false negatives
		insert_page(output_file, page_number, data)
	entropy_map[page_number] = data_entropy

예제 #40

0

파일 보기

파일: entropy_test.py 프로젝트: bahusvel/ERA

import entropy

input_file = open("text.crypto", "rb")
data = input_file.read()

print("Shannon entropy is ", entropy.shannon_entropy(data))

예제 #41

0

파일 보기

파일: EntropyPlug.py 프로젝트: Bijaye/codex-backend

 def process(self):
     res=entropy.shannon_entropy(self.sample.getBinary()) * 8
     return res

예제 #42

0

파일 보기

파일: cryptanalysis.py 프로젝트: adeptex/crypto

#!/usr/bin/python
import sys, os
if len(sys.argv) < 2: sys.exit("Usage: cryptanalysis.py encrypted.raw")
if not os.path.isfile(sys.argv[1]): sys.exit("File not found")
with open(sys.argv[1], "rb") as f: cipher = f.read()
print '''
[+]----------[ Cryptanalysis by t3h XRUST ]----------------------------------[+]
 |
 + Common Structures:
 |	* Fixed-length data
 |	* Variable-length data with separator chars
 |	* Variable-length data with length fields
 |
 + Common Mistakes:
 |	* Home-grown encryption
 |	* Insecure cipher mode (ECB, CBC, OFB, ...)
 |	* Poor key selection / Insufficient key length / Key reuse
 |	* Insecure random number generator
 |
'''
import entropy
print "+ Entropy:           %s" % entropy.shannon_entropy(cipher)
import collections
freq = collections.Counter(cipher)
print "+ Common Characters: %s" % freq.most_common(5)
length = len(cipher)
print "+ Ciphertext Length: %d bytes" % length
print "|---  8 byte blocks: %d (remainder: %d bytes)" % (length/8, length%8)
print "|--- 16 byte blocks: %d (remainder: %d bytes)" % (length/16, length%16)
print "|\n+" + "-"*40 + "+++"

예제 #43

0

파일 보기

파일: entropy_per_page.py 프로젝트: bahusvel/ERA

import entropy

input_file = open("../python-mem/text.crypto", "rb")
count = 1
while True:
	data = input_file.read(4096)
	if len(data) != 4096:
		break
	print("Page {}:{}".format(count, entropy.shannon_entropy(data)))
	count += 1

예제 #44

0

파일 보기

파일: simpl.py 프로젝트: tuxxy/simpl

 def count_entropy(self, password):
     """ Counts shannon entropy of password."""
     print("Shannon Entropy count: {}".format(entropy.shannon_entropy(password)))