Example #1
0
    def gen_ssdeep_hash(self, filepath, exclude=False):
        files = os.listdir(filepath)
        for file in files:
            if not os.path.isfile(filepath + file):
                print "[+] WARNING: %s is not a file and will not analysis it. " % (
                    filepath + file)
                continue
            tmp_ssdeep_hash = ''
            if self.cluster_type == 'strings_ssdeep':
                data = os.popen('strings %s' % (filepath + file)).read()

                tmp_ssdeep_hash = ssdeep.hash(data)
            elif self.cluster_type == 'file_ssdeep':
                tmp_ssdeep_hash = ssdeep.hash_from_file(filepath + file)

            elif self.cluster_type == 'imp_exp_ssdeep':
                imp_exp_str = imp_exp_functions(filepath + file)
                if imp_exp_str:
                    tmp_ssdeep_hash = ssdeep.hash(imp_exp_str)

            if tmp_ssdeep_hash:
                dst_file = self.tmpdir + str(self.count)
                f = open(dst_file, 'w')
                f.write(tmp_ssdeep_hash)
                f.close()
                self.count += 1
                if tmp_ssdeep_hash not in self.ssdeep_stats.keys():
                    self.ssdeep_stats[tmp_ssdeep_hash] = []
                tmp_file_ssdeep = {}
                tmp_file_ssdeep['file_path'] = filepath + file
                tmp_file_ssdeep['file_md5'] = file_md5(filepath + file)
                tmp_file_ssdeep['cluster_type'] = self.cluster_type
                tmp_file_ssdeep['exclude'] = 1 if exclude else 0
                self.ssdeep_stats[tmp_ssdeep_hash].append(tmp_file_ssdeep)
Example #2
0
 def _calc_score(self, lt_new):
     try:
         import ssdeep
     except ImportError:
         raise ImportError(
             "ltgroup algorithm <ssdeep> needs python package ssdeep")
     ret = []
     h1 = ssdeep.hash(str(lt_new))
     if self._mem_hash:
         if len(self._d_hash) == 0:
             # initialize d_hash
             for lt in self._lttable:
                 h = ssdeep.hash(str(lt))
                 self._d_hash[lt.ltid] = h
         for ltid, lt_temp in enumerate(self._lttable):
             h2 = self._d_hash[lt_temp.ltid]
             score = ssdeep.compare(h1, h2)
             ret.append((ltid, score))
         self._d_hash[lt_new.ltid] = h1
     else:
         for lt_temp in self._lttable:
             ltid = lt_temp.ltid
             score = hash_score(str(lt_new), str(lt_temp))
             ret.append((ltid, score))
     return ret
Example #3
0
def e_ssdeep(path, file_):
    pe = pefile.PE(path)
    # print type(pe.__data__[::])
    whole_file = ssdeep.hash(pe.__data__[::])
    # print whole_file
    base = pe.OPTIONAL_HEADER.ImageBase
    ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint
    # print hex(ep)

    ep_section = None

    for section in pe.sections:
        size = section.Misc_VirtualSize
        if size == 0:
            size = section.SizeOfRawData

        if ep >= section.VirtualAddress and ep < section.VirtualAddress + size:
            # print section.Name
            # sect_data = pe.__data__[section.PointerToRawData:section.SizeOfRawData]
            # print sect_data[:10].encode("hex")
            ep_section = ssdeep.hash(pe.get_data(section.VirtualAddress))
            break

    if not ep_section:
        logging.error("Couldn't fine EP section in {}".format(path))
    else:
        return database.SSDEEP(file=file_,
                               whole_file=whole_file,
                               ep_section=ep_section)
Example #4
0
 def _calc_score(self, lt_new):
     try:
         import ssdeep
     except ImportError:
         raise ImportError(
                 "ltgroup algorithm <ssdeep> needs python package ssdeep")
     ret = []
     h1 = ssdeep.hash(str(lt_new))
     if self._mem_hash:
         if len(self._d_hash) == 0:
             # initialize d_hash
             for lt in self._lttable:
                 h = ssdeep.hash(str(lt))
                 self._d_hash[lt.ltid] = h
         for ltid, lt_temp in enumerate(self._lttable):
             h2 = self._d_hash[lt_temp.ltid]
             score = ssdeep.compare(h1, h2)
             ret.append((ltid, score))
         self._d_hash[lt_new.ltid] = h1
     else:
         for lt_temp in self._lttable:
             ltid = lt_temp.ltid
             score = hash_score(str(lt_new), str(lt_temp))
             ret.append((ltid, score))
     return ret
Example #5
0
def simindex(sign, web, size):

    if size == 'H':
        sign1 = map(lambda x: ssdeep.hash(str(x)), sign)
        web1 = map(lambda x: ssdeep.hash(str(x)), web)
    else:
        sign1 = map(lambda x: str(x), sign)
        web1 = map(lambda x: str(x), web)

    table = []

    for s in sign1:

        table.append(map(lambda x: similarity.compare(x, s), web1))

    if len(web1) < len(sign1):

        table = map(lambda x: x + [0] * (len(sign1) - len(web1)), table)

    maxi = 0
    for t in range(0, len(table)):

        #print map(lambda x: len(x), table)
        m = max(table[t])
        maxi += m
        i = table[t].index(m)

        for k in range(t+1, len(table)):
            
            del table[k][i]

    return maxi*1.0/len(sign)
    def Detect_macro_script(self):
        distinction = True
        self.macro_parser_result_dic = {}
        try:
            script_data = self.hwp_info.openstream(
                'Scripts/DefaultJScript').read()

            data = zlib.decompress(script_data, -15)
        except:
            data = script_data
        for index, value in enumerate(data):
            if (eq(hex(value), '0x2f')):
                distinction = False
            if (distinction == True):
                if (eq(hex(value), '0x0') == False):
                    self.variable_source.append(chr(value))
            if (distinction == False):
                if (eq(hex(value), '0x0') == False):
                    self.macro_source.append(chr(value))

        variable_result = ''.join(
            (self.variable_source[i])
            for i in range(1,
                           len(self.variable_source) - 4))
        macro_result = ''.join((self.macro_source[i])
                               for i in range(1,
                                              len(self.macro_source) - 4))

        variable_ssdeep = ssdeep.hash(variable_result)
        macro_ssdeep = ssdeep.hash(macro_result)

        # 다를 경우
        if (len(macro_result) !=
                47 & macro_result.find('function OnDocument_New()') == -1):
            ip_match = re.compile(Regular_IP, re.MULTILINE)
            email_match = re.compile(Regular_email, re.MULTILINE)
            url_match = re.compile(Regular_url, re.MULTILINE)

            self.macro_parser_result_dic['ip'] = ip_match.findall(
                "macro_result")
            self.macro_parser_result_dic['email'] = email_match.findall(
                "macro_result")
            self.macro_parser_result_dic['url'] = url_match.findall(
                "macro_result")
            self.macro_parser_result_dic['variable_ssdeep'] = ssdeep.hash(
                variable_result)
            self.macro_parser_result_dic['macro_ssdeep'] = ssdeep.hash(
                macro_result)

        # 기존과 같은 경우
        else:
            self.macro_parser_result_dic['variable_ssdeep'] = 'None'
            self.macro_parser_result_dic['macro_ssdeep'] = 'None'
            self.macro_parser_result_dic['ip'] = 'None'
            self.macro_parser_result_dic['email'] = 'None'
            self.macro_parser_result_dic['url'] = 'None'

        return self.macro_parser_result_dic
Example #7
0
    def testComputeHash(self):
        self.assertEqual(
            ssdeep.hash("Also called fuzzy hashes, Ctph can match inputs that have homologies."),
            "3:AXGBicFlgVNhBGcL6wCrFQEv:AXGHsNhxLsr2C"
        )

        self.assertEqual(
            ssdeep.hash("Also called fuzzy hashes, CTPH can match inputs that have homologies."),
            "3:AXGBicFlIHBGcL6wCrFQEv:AXGH6xLsr2C"
        )
Example #8
0
def main():

    parser = argparse.ArgumentParser(description="Scan  document for embedded objects.")
    parser.add_argument("file", help="File to process.")
    parser.add_argument('--extract', dest='extract', action='store_true', help="Extract ActiveMime Objects.")

    args = parser.parse_args()

    print 'ActiveMime Helper'
    print '-----------------'
    print ' [*] Loading file....%s ' % args.file

    if isfile(args.file):
    	tmpf = open(args.file, 'rb')
    	args.file = tmpf.read()
        amd = None
        if ActiveMimeDoc.is_activemime(args.file):
            amd = ActiveMimeDoc(args.file, ActiveMimeDoc.is_base64(args.file))

        else:
            print formatmsg(' [*] File is not an ActiveMime Document', 'yellow')
            print " [*] Parsing as MIME Document"
            amd = process(args.file, False)

        if amd:
            print ' ------------------------------------------------------'
            print '  ActiveMime Document'
            print '   - {:18}{}'.format('Size:', len(amd.rawdoc))
            print '   - {:18}{}'.format('Hash:', hashlib.sha1(amd.rawdoc).hexdigest())

            if __has_ssdeep:
                print '   - {:18}{}'.format('ssdeep:', ssdeep.hash(amd.rawdoc))

            print '  Payload Data'
            print '   - {:18}{}'.format('Compressed Size:',amd.compressed_size)
            print '   - {:18}{}'.format('Size:', amd.size)
            print '   - {:18}{}'.format('Hash:', hashlib.sha1(amd.data).hexdigest())

            if __has_ssdeep:
                print '   - {:18}{}'.format('Data ssdeep:', ssdeep.hash(amd.data))

            print '   - {:18}{}'.format('VBA Tail:', amd.has_vba_tail)
            print '   - {:18}{}'.format('OLE Doc:', amd.is_ole_doc)
            print ' ------------------------------------------------------'

            if args.extract:
                print ' [*] Writing decoded Project file'
                with open(hashlib.md5(amd.data).hexdigest(), 'wb') as out:
                    out.write(amd.data)
        return 0

    else:
        print formatmsg(' [!] File does not exist...exiting', 'red')
        return
Example #9
0
def file_similarity(file1, file2):
    file1_cat = folder_category(file1)
    file2_cat = folder_category(file2)

    if file1_cat == "jadx":
        file1_codelist = parser.parser_cfr(file1)
    elif file1_cat == "cfr":
        file1_codelist = parser.parser_cfr(file1)
    elif file1_cat == "jdcore":
        file1_codelist = parser.parser_jdgui(file1)

    if file2_cat == "jadx":
        file2_codelist = parser.parser_cfr(file2)
    elif file2_cat == "cfr":
        file2_codelist = parser.parser_cfr(file2)
    elif file2_cat == "jdcore":
        file2_codelist = parser.parser_jdgui(file2)

    all_code = max(len(file1_codelist), len(file2_codelist))
    all_score = 0

    for file1_code in file1_codelist:
        temp = 0
        for file2_code in file2_codelist:
            if " {" not in file1_code or " {" not in file2_code:
                if isSamefunc(file1_code, file2_code):
                    h1 = ssdeep.hash(parser.trim(file1_code))
                    h2 = ssdeep.hash(parser.trim(file2_code))
                    score = ssdeep.compare(h1, h2)
                    all_score = all_score + score
                    # print(file1_code)
                    # print(file2_code)
                    # print(score)

            else:
                if isSamefunc(file1_code[:file1_code.index(" {")],
                              file2_code[:file2_code.index(" {")]):
                    # parser.trim(file1_code)
                    h1 = ssdeep.hash(parser.trim(file1_code))
                    h2 = ssdeep.hash(parser.trim(file2_code))
                    score = ssdeep.compare(h1, h2)

                    all_score = all_score + score
                    #print(file1_code)
                    #print(file2_code)
                    #print(score)

        # print (h_jadx)
        # print (h_cfr)

    return all_score / (all_code)
def ssdeepcompare(target, IP):
    try:
        ss_target = requests.get('http://{}/'.format(target))
        ssdeep_target_fuzz = ssdeep.hash(ss_target.text)
        print target, ssdeep_target_fuzz
        content = requests.get('https://{}'.format(IP),
                               verify=False,
                               timeout=5,
                               headers={'Host': target})
        ssdeep_fuzz = ssdeep.hash(content.text)
        print IP, ssdeep_fuzz
        print "ssdeep score for", IP, "is", ssdeep.compare(
            ssdeep_target_fuzz, ssdeep_fuzz)
    except (requests.exceptions.ConnectionError):
        print "cant connect to", IP
Example #11
0
def getPerFunctionHash():
	"""
		Iterates on program function and, for each, computes
	 	- MD5 sum
	 	- SSDEEP
	"""
	functions = Functions()
	hashes = {}
	for function in functions:
		funcCode = getFunctionCode(function)
		funcCode = cleanUpCode(function, funcCode)
		ssdeepstr = ssdeep.hash(funcCode)
		md5str = md5.new(funcCode).hexdigest()
		#lsh = LSHash(512, len(funcCode))
		#lsh.index(funcCode)
		# TODO ADD OTHER TYPE OF HASHES
		hashes[function] = {
			"md5" : md5str,
			"ssdeep" : ssdeepstr,
		}
		if debug:
			print "sub_%08x %s %s" % (function, md5str, ssdeepstr) # DEBUG

	if dump: # save hash table in dump mode
		fd = open("./%s/%s.pickle" % (dumpdir, "hashes"), "w")
		pickle.dump(hashes, fd)
		fd.close()
	return hashes
Example #12
0
File: utils.py Project: hypawn/mazu
def compute_ssdeep(fp):
    try:
        buff = fp.read()
    except AttributeError:
        pass
    else:
        return ssdeep.hash(buff)
Example #13
0
    def calc_hash(cls, pe_data):
        try:
            fuzzy_result = ssdeep.hash(pe_data)
        except:
            fuzzy_result = "Unable to calc ssdeep"

        return fuzzy_result
Example #14
0
    def build_sample(self, data, url=None):
        if not data:
            return None

        p = dict()
        p["type"] = self.get_sample_type(data)
        if p["type"] is None:
            return None

        p["md5"] = hashlib.md5(data).hexdigest()
        p["sha1"] = hashlib.sha1(data).hexdigest()

        if SSDEEP:
            p["ssdeep"] = ssdeep.hash(data)

        if p["type"] in ("PE",):
            imphash = self.get_imphash(data)
            if imphash:
                p["imphash"] = imphash

        if url:
            p["url"] = url

        p["data"] = base64.b64encode(data)

        return p
Example #15
0
def getfuzzyhash():
    """Returns fuzzy hash of spam.
    This function returns hash generated using the ssdeep library.
    Hash is generated using the combination of mail's body + subject.
    Msg length is being checked because SSDEEP has some issues with comparing hashes
    of small spams. If spam's body is very less or non existent, we add our randomText to body.
    There would be certain cases when there wouldn't be any html or text portion i.e. email body would be empty. Hence forth len = html/text + subject
    In shivamaindb.py if len < 10 then keeping comparision ratio higher
    """
    if mailFields['html']:
        if len(mailFields['html']) < 150:
            data = mailFields['html'] + " " + mailFields['subject'] + randomText
        else:
            data = mailFields['html'] + " " + mailFields['subject']
        mailFields['len'] = len(mailFields['html']) + len(mailFields['subject'])
    
    elif mailFields['text']:
        if len(mailFields['text']) < 150:
            data = mailFields['text'] + " " + mailFields['subject'] + randomText
        else:
            data = mailFields['text'] + " " + mailFields['subject']
        mailFields['len'] = len(mailFields['text']) + len(mailFields['subject'])
    else:
        # Test mails without body and limited chars in subject
        data = mailFields['subject'] + mailFields['from'] + randomText
        mailFields['len'] = len(mailFields['subject'])
    
    return ssdeep.hash(data)
Example #16
0
def should_parse(repo, state):
    if repo.repository.owner.login in state.bad_users:
        print(bcolors.FAIL + "Failed check: Ignore User" + bcolors.ENDC)
        return False
    if repo.repository.name in state.bad_repos:
        print(bcolors.FAIL + "Failed check: Ignore Repo" + bcolors.ENDC)
        return False
    if repo.name in state.bad_files:
        print(bcolors.FAIL + "Failed check: Ignore File" + bcolors.ENDC)
        return False

    # Fuzzy Hash Comparison
    try:
        candidate_sig = ssdeep.hash(repo.decoded_content)
        for sig in state.bad_signatures:
            similarity = ssdeep.compare(candidate_sig, sig)
            if similarity > SIMILARITY_THRESHOLD:
                print(bcolors.FAIL +
                      "Failed check: Ignore Fuzzy Signature on Contents "
                      "({}% Similarity)".format(similarity) + bcolors.ENDC)
                return False
    except github.UnknownObjectException:
        print(bcolors.FAIL + "API Error: File no longer exists on github.com" +
              bcolors.ENDC)
        return False
    return True
Example #17
0
    def fingerprints(data):
        """This function return the fingerprints of data:
            - md5
            - sha1
            - sha256
            - sha512
            - ssdeep
        """

        # md5
        md5 = hashlib.md5()
        md5.update(data)
        md5 = md5.hexdigest()

        # sha1
        sha1 = hashlib.sha1()
        sha1.update(data)
        sha1 = sha1.hexdigest()

        # sha256
        sha256 = hashlib.sha256()
        sha256.update(data)
        sha256 = sha256.hexdigest()

        # sha512
        sha512 = hashlib.sha512()
        sha512.update(data)
        sha512 = sha512.hexdigest()

        # ssdeep
        ssdeep_ = ssdeep.hash(data)

        return md5, sha1, sha256, sha512, ssdeep_
def get_hash(path):
    f = open(path, 'r', encoding='utf-8')
    lines = [l.strip() for l in f.readlines()]
    codes = ""
    for line in lines:
        codes += line
    return ssdeep.hash(codes)
    def extract_sections_privileges(self):
        section_dict = {}

        for section in self.pe.sections:
            try:
                # 섹션 이름 추출
                section_name = section.Name.decode().split('\x00')[0]
                entropy = section.get_entropy()
                hash_ssdeep = ssdeep.hash(section.get_data())
                hash_md5 = hashlib.md5(section.get_data()).hexdigest().upper()
                offset = hex(section.PointerToRawData)
                character = hex(section.Characteristics)[2:]
                virtual_address = section.VirtualAddress
                virtual_size = section.Misc_VirtualSize
                raw_size = section.SizeOfRawData
            except:
                continue

            section_dict[section_name] = {
                'section_name': section_name,
                'entropy': entropy,
                'virtual_address': virtual_address,
                'virtual_size': virtual_size,
                'raw_size': raw_size,
                'hash_md5': hash_md5,
                'hash_ssdeep': hash_ssdeep,
                'offset': offset,
                'character': character
            }

        return section_dict
Example #20
0
def get_spam_level(player, message_content):
    """
    Get's a spam level for a message using a 
    fuzzy hash > 50% means it's probably spam
    """

    try:
        message_hash = ssdeep.hash(message_content)
        spam_level = 0    
        spam_levels = [ssdeep.compare(message_hash, prior_hash) for prior_hash in player.last_message_hashes if
                       prior_hash is not None]
        if len(spam_levels) > 0:
            spam_level = max(spam_levels)
        player.last_message_hashes.append(message_hash)
        if spam_level > SPAM_TOLERANCE:
            player.spam_detections += 1
            try:
                util.logger.info("Message '" + message_content + "' detected as SPAM!")
                util.logger.info("spam_level was " + str(spam_level) + ".")
                util.logger.info("Player " + player.name + " now has " + str(player.spam_detections) + " spam detections.")
            except Exception as e:
                util.logger.info("Failed to print spam detection debug message because " + str(e) + ".")
    except:
        spam_level = 0 # if ssdeep doesn't work
        try:
            util.logger.info("ssdeep failed to process message!")
            util.logger.info("Message was '" + message_content + "' by player " + player.name + ".")
            util.logger.info("BTW, player " + player.name + " has " + str(player.spam_detections) + " spam detections.")
        except Exception as e:
            util.logger.info("Failed to print ssdeep failure debug message because " + str(e) + ".")
    return spam_level
Example #21
0
def lambda_handler(event, context):
    r''' handles the image files and directs it to ghiro for digital forensics'''
    srcbucket = event['Records'][0]['s3']['bucket']['name']
    key = urllib.unquote_plus(
        event['Records'][0]['s3']['object']['key'].encode('utf8'))
    try:
        response = s3.get_object(Bucket=srcbucket, Key=key)
    except Exception as e:
        print(
            'Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'
            .format(key, srcbucket))
        raise e
    try:
        hashval = ssdeep.hash(response['Body'].read())
    except Exception as e:
        print('Error occurred while computing hash of the file {}', key)
        raise e

    try:
        if isBadHash(hashval) == False:
            s3.copy_object(Bucket=dstbucket,
                           CopySource={
                               'Bucket': srcbucket,
                               'Key': key
                           },
                           Key=key)
    except Exception as e:
        print('Error occurred while comparing hash of the file {}'.format(key))
        raise e
Example #22
0
    def getFileProperties(self, filename,fc):
        self.logger.info("Getting file ID")
        fp={'filename':filename}
        try:
            #File size
            fp['size']=len(fc)
            #MD5
            m=hashlib.md5()
            m.update(fc)
            fp['md5']=m.hexdigest()
            #SHA1
            m=hashlib.sha1()
            m.update(fc)
            fp['sha1']=m.hexdigest()
            #SHA256
            m=hashlib.sha256()
            m.update(fc)
            fp['sha256']=m.hexdigest()
            #SSDEEP
            fp['ssdeep']=ssdeep.hash(fc)
            #Magic
            fp['magic']=magic.from_buffer(fc)
            #Exiftool
            #NOTE: exiftool shits itself on certian formats, wipe it's ass someday
            fp['filetype']=self.et.get_tag('FileType',self.incoming+filename)
            #Tag
            if fp['magic'] is not '':
                fp['tags']=[fp['magic'].split()[0].lower()]
            else:
                fp['tags']=[]

        except IOError as e:
            self.logger.error("IO Error", exc_info=True)

        return {'_id':fp['sha1'],'id':fp}
Example #23
0
def fingerprints(data):
    # md5
    md5 = hashlib.md5()
    md5.update(data)
    md5 = md5.hexdigest()

    # sha1
    sha1 = hashlib.sha1()
    sha1.update(data)
    sha1 = sha1.hexdigest()

    # sha256
    sha256 = hashlib.sha256()
    sha256.update(data)
    sha256 = sha256.hexdigest()

    # sha512
    sha512 = hashlib.sha512()
    sha512.update(data)
    sha512 = sha512.hexdigest()

    # ssdeep
    ssdeep_ = ssdeep.hash(data)

    return md5, sha1, sha256, sha512, ssdeep_
Example #24
0
def info_file(path):
    BUF_SIZE = 65536
    md5 = hashlib.md5()
    sha1 = hashlib.sha1()
    sha256 = hashlib.sha256()
    sha512 = hashlib.sha512()

    with open(path, 'rb') as f:
        while True:
            data = f.read(BUF_SIZE)
            if not data:
                break
            md5.update(data)
            sha1.update(data)
            sha256.update(data)
            sha512.update(data)
            SSDEEP = ssdeep.hash(data)
            TLSH = tlsh.hash(data)

    print("MD5: {0}".format(md5.hexdigest()))
    print("SHA1: {0}".format(sha1.hexdigest()))
    print("SHA256: {0}".format(sha256.hexdigest()))
    print("SHA512: {0}".format(sha512.hexdigest()))
    print("SSDEEP: {0}".format(SSDEEP))
    print("TLSH: {0}".format(TLSH))
def make_request(url, quiet=False, raw_results=False):
    if not quiet:
        print "Requesting {0}".format(url)
    r = requests.get(url, allow_redirects=False, timeout=90)
    content = r.content
    return r.status_code, ssdeep.hash(
        content) if not raw_results else content.encode('hex')
Example #26
0
def log_data(url, resp, now, loot_dir):
    page_data = resp.text
    page_data_binary = page_data.encode()
    sha2 = hashlib.sha256(page_data_binary).hexdigest()
    parsed = urlparse(url)
    if parsed.path == '' or parsed.path == '/':
        full_path = loot_dir + (parsed.netloc + '/') + ('index' + sha2)
    else:
        full_path = loot_dir + (parsed.netloc + '/') + (parsed.path + sha2)
    with open(full_path, 'a') as fd:
        _LOGGER.info('Writing out HTTP text body for %s' % url)
        fd.write(page_data)
    payload = {
        'status_code': resp.status_code,
        'domain': parsed.netloc,
        'path': parsed.path,
        'url': url,
        'time': str(now),
        'sha2': sha2,
        'md5': hashlib.md5(page_data.encode()).hexdigest(),
        'ssdeep': ssdeep.hash(page_data),
        'has_forms': len(get_forms(page_data)) > 0
    }
    with open('%s.json' % full_path, 'a') as fd:
        _LOGGER.info('Writing out metadata for %s' % url)
        json.dump(payload, fd, ensure_ascii=False, indent=4)
    return payload
Example #27
0
    def build_sample(self, data, url=None):
        if not data:
            return None

        p = dict()
        p['type'] = self.get_sample_type(data)
        if p['type'] is None:
            return None

        p['md5'] = hashlib.md5(data).hexdigest()
        p['sha1'] = hashlib.sha1(data).hexdigest()

        if SSDEEP:
            p['ssdeep'] = ssdeep.hash(data)

        if p['type'] in ('PE', ):
            imphash = self.get_imphash(data)
            if imphash:
                p['imphash'] = imphash

        if url:
            p['url'] = url

        p['data'] = base64.b64encode(data)

        return p
def getSsdeep(data):
    try:
        res=ssdeep.hash(data)
        return res
    except Exception, e:
        logging.exception(str(e))
        return ''
Example #29
0
    def analyze_info_data(self):
        sum1 = self.pkg1.package['info']['summary'] or ''
        sum2 = self.pkg2.package['info']['summary'] or ''
        self.flags['similar_description'] = (ssdeep.compare(ssdeep.hash(sum1), ssdeep.hash(sum2)) > 80)

        page1 = self.pkg1.package['info']['home_page'] or ''
        page2 = self.pkg2.package['info']['home_page'] or ''
        self.flags['same_homepage'] = (page1 == page2)

        docs1 = self.pkg1.package['info']['docs_url'] or ''
        docs2 = self.pkg2.package['info']['docs_url'] or ''
        self.flags['same_docs'] = (docs1 == docs2)

        releases1 = set(self.pkg1.package['releases'].keys())
        releases2 = set(self.pkg2.package['releases'].keys())
        self.flags['has_subreleases'] = (releases2.issubset(releases1))
Example #30
0
    def build_sample(self, data, url = None, sampletype = None):
        if not data:
            return None

        p = dict()

        if sampletype:
            p['type'] = sampletype
        else:
            p['type'] = self.get_sample_type(data)

        if p['type'] is None:
            return None

        p['md5']    = hashlib.md5(data).hexdigest()
        p['sha1']   = hashlib.sha1(data).hexdigest()
        p['sha256'] = hashlib.sha256(data).hexdigest()

        if SSDEEP:
            p['ssdeep'] = ssdeep.hash(data)

        if p['type'] in ('PE', ):
            imphash = self.get_imphash(data)
            if imphash:
                p['imphash'] = imphash

        if url:
            p['url'] = url

        p['data'] = base64.b64encode(data)

        return p
Example #31
0
    def getfuzzyhash(self):
        """
    Returns fuzzy hash of spam using the ssdeep library.
    Hash is generated using the combination of mail's body + subject.
    Msg length is being checked because SSDEEP has some issues with comparing hashes
    of small spams. If spam's body is very small or non-existent, we add our randomText to the body.
    There are certain cases when the email body is empty. Hence len = html/text + subject
    """
        if self.mailFields['html']:
            if len(self.mailFields['html']) < 150:
                data = self.mailFields['html'] + " " + self.mailFields[
                    'subject'] + self.randomText
            else:
                data = self.mailFields['html'] + " " + self.mailFields[
                    'subject']
            self.mailFields['len'] = len(self.mailFields['html']) + len(
                self.mailFields['subject'])

        elif self.mailFields['text']:
            if len(self.mailFields['text']) < 150:
                data = self.mailFields['text'] + " " + self.mailFields[
                    'subject'] + self.randomText
            else:
                data = self.mailFields['text'] + " " + self.mailFields[
                    'subject']
            self.mailFields['len'] = len(self.mailFields['text']) + len(
                self.mailFields['subject'])
        else:
            # Test mails without body and limited chars in subject
            data = self.mailFields['subject'] + self.mailFields[
                'from'] + self.randomText
            self.mailFields['len'] = len(self.mailFields['subject'])

        return ssdeep.hash(data)
Example #32
0
    def fingerprints(data):
        """This function return the fingerprints of data.

        Args:
            data (string): raw data

        Returns:
            tuple: fingerprints md5, sha1, sha256, sha512, ssdeep
        """

        # md5
        md5 = hashlib.md5()
        md5.update(data)
        md5 = md5.hexdigest()

        # sha1
        sha1 = hashlib.sha1()
        sha1.update(data)
        sha1 = sha1.hexdigest()

        # sha256
        sha256 = hashlib.sha256()
        sha256.update(data)
        sha256 = sha256.hexdigest()

        # sha512
        sha512 = hashlib.sha512()
        sha512.update(data)
        sha512 = sha512.hexdigest()

        # ssdeep
        ssdeep_ = ssdeep.hash(data)

        return md5, sha1, sha256, sha512, ssdeep_
Example #33
0
    def run_detect(self) -> t.Dict[str, str]:
        """ Given a blob of data, run initial detection to gather metadata """

        with open(self.filepath, "rb") as fd:
            data = fd.read()

        # generate table of hashes useful for analyst
        hashes: t.Dict[str, str] = {}
        hashes["MD5"] = hashlib.md5(data).hexdigest()
        hashes["SHA256"] = hashlib.sha256(data).hexdigest()
        hashes["Similiarity Hash (ssdeep)"] = ssdeep.hash(data)

        # VT checks are optional, and only occur if $VT_API is set
        vt_api: t.Optional[str] = os.environ.get("VT_API")
        if vt_api:
            params = {"apiKey": vt_api}
            files = {"file": binary}
            resp = requests.post(
                "https://www.virustotal.com/vtapi/v2/file/scan",
                files=files,
                params=params,
            )
            print(resp.json())

        return hashes
Example #34
0
def ui_loop(repo, log_buf, state, is_gist=False):
    choice = input_handler(state, is_gist)

    if choice == "c":
        state.bad_signatures.append(ssdeep.hash(repo.decoded_content))
    elif choice == "u":
        state.bad_users.append(
            repo.owner.login if is_gist else repo.repository.owner.login)
    elif choice == "r" and not is_gist:
        state.bad_repos.append(repo.repository.name)
    elif choice == "f" and not is_gist:
        state.bad_files.append(repo.name)
    elif choice == "p":
        print_handler(repo.decoded_content)
        ui_loop(repo, log_buf, state, is_gist)
    elif choice == "s":
        save_state(state.query, state)
        ui_loop(repo, log_buf, state, is_gist)
    elif choice == "a":
        with open(state.logfile, "a") as fd:
            fd.write(log_buf)
    elif choice.startswith("/"):
        log_buf += regex_handler(choice, repo)
        ui_loop(repo, log_buf, state, is_gist)
    elif choice == "b":
        if state.index - 1 < state.lastInitIndex:
            print(bcolors.FAIL + "Can't go backwards past restore point "
                  "because of rate-limiting/API limitations" + bcolors.ENDC)
            ui_loop(repo, log_buf, state, is_gist)
        else:
            state.index -= 2
    elif choice == "q":
        sys.exit(0)
Example #35
0
def should_parse(repo, state, is_gist=False):
    owner_login = repo.owner.login if is_gist else repo.repository.owner.login
    if owner_login in state.bad_users:
        print(bcolors.FAIL + "Failed check: Ignore User" + bcolors.ENDC)
        return False
    if not is_gist and repo.repository.name in state.bad_repos:
        print(bcolors.FAIL + "Failed check: Ignore Repo" + bcolors.ENDC)
        return False
    if not is_gist and repo.name in state.bad_files:
        print(bcolors.FAIL + "Failed check: Ignore File" + bcolors.ENDC)
        return False

    # Fuzzy Hash Comparison
    try:
        if not is_gist:
            # Temporary fix for PyGithub until fixed upstream (PyGithub#1178)
            repo._url.value = repo._url.value.replace(
                repo._path.value, urllib.parse.quote(repo._path.value))

        candidate_sig = ssdeep.hash(repo.decoded_content)
        for sig in state.bad_signatures:
            similarity = ssdeep.compare(candidate_sig, sig)
            if similarity > SIMILARITY_THRESHOLD:
                print(bcolors.FAIL +
                      "Failed check: Ignore Fuzzy Signature on Contents "
                      "({}% Similarity)".format(similarity) + bcolors.ENDC)
                return False
    except github.UnknownObjectException:
        print(bcolors.FAIL + "API Error: File no longer exists on github.com" +
              bcolors.ENDC)
        return False
    return True
Example #36
0
def getfuzzyhash():
    """Returns fuzzy hash of spam.
    This function returns hash generated using the ssdeep library.
    Hash is generated using the combination of mail's body + subject.
    Msg length is being checked because SSDEEP has some issues with comparing hashes
    of small spams. If spam's body is very less or non existent, we add our randomText to body.
    There would be certain cases when there wouldn't be any html or text portion i.e. email body would be empty. Hence forth len = html/text + subject
    In shivamaindb.py if len < 10 then keeping comparision ratio higher
    """
    if mailFields['html']:
        if len(mailFields['html']) < 150:
            data = mailFields['html'] + " " + mailFields['subject'] + randomText
        else:
            data = mailFields['html'] + " " + mailFields['subject']
        mailFields['len'] = len(mailFields['html']) + len(
            mailFields['subject'])

    elif mailFields['text']:
        if len(mailFields['text']) < 150:
            data = mailFields['text'] + " " + mailFields['subject'] + randomText
        else:
            data = mailFields['text'] + " " + mailFields['subject']
        mailFields['len'] = len(mailFields['text']) + len(
            mailFields['subject'])
    else:
        # Test mails without body and limited chars in subject
        data = mailFields['subject'] + mailFields['from'] + randomText
        mailFields['len'] = len(mailFields['subject'])

    return ssdeep.hash(data)
Example #37
0
    def compute_hashes(self):
        """ Compute the file hashes """
        filename = self.get_file_path(self.sha256)

        # Make sure the file exists and is readable
        if not os.access(filename, os.R_OK):
            flash('There was an error while trying to analyse the file.', 'danger')
            return False

        with open(filename, 'rb') as f:
            buf = f.read()

        if self.sha256 is None:
            self.sha256 = hashlib.sha256(buf).hexdigest()
        if self.sha1 is None:
            self.sha1 = hashlib.sha1(buf).hexdigest()
        if self.md5 is None:
            self.md5 = hashlib.md5(buf).hexdigest()
        if self.ssdeep is None:
            self.ssdeep = ssdeep.hash(buf)
        if self.mime is None:
            try:
                self.mime = magic.from_buffer(buf, mime=True).decode('utf-8')
            except:
                self.mime = None
        if self.entropy is None:
            self.entropy = self.compute_entropy(buf)
Example #38
0
    def build_sample(self, data, url=None, sampletype=None):
        if not data:
            return None

        p = dict()

        if sampletype:
            p['type'] = sampletype
            if isinstance(data, str):
                data = data.encode()
        else:
            p['type'] = self.get_sample_type(data)

        if p['type'] is None:
            return None

        p['md5'] = hashlib.md5(data).hexdigest()
        p['sha1'] = hashlib.sha1(data).hexdigest()
        p['sha256'] = hashlib.sha256(data).hexdigest()

        if SSDEEP:
            p['ssdeep'] = ssdeep.hash(data)

        if p['type'] in ('PE', ):
            imphash = self.get_imphash(data)
            if imphash:
                p['imphash'] = imphash

        if url:
            p['url'] = url

        p['data'] = base64.b64encode(data).decode()
        return p
Example #39
0
    def get_hashes(self, address_code):
        """Given array of contract bytecodes returns array of corresponding ssdeep (CTPH) hashes

        keyword arguments:
        address_code -- Array of bytecodes as strings"""
        hash_arr = map(lambda x: ssdeep.hash(x), address_code)
        return hash_arr
Example #40
0
def hash_data(file):
    md5 = hashlib.md5(file.content).hexdigest()
    sha128 = hashlib.sha1(file.content).hexdigest()
    sha256 = hashlib.sha256(file.content).hexdigest()
    sha512 = hashlib.sha512(file.content).hexdigest()
    ssdeep_hash = ssdeep.hash(file.content)
    hash = {'evil_finder_md5':md5, 'evil_finder_sha128':sha128, 'evil_finder_sha256':sha256, 'evil_finder_sha512':sha512, 'evil_finder_ssdeep':ssdeep_hash}
    return hash
Example #41
0
 def execute(self, input_data):
     raw_bytes = input_data["sample"]["raw_bytes"]
     self.meta["sha1"] = hashlib.sha1(raw_bytes).hexdigest()
     self.meta["sha256"] = hashlib.sha256(raw_bytes).hexdigest()
     self.meta["ssdeep"] = ssd.hash(raw_bytes)
     self.meta["entropy"] = self._entropy(raw_bytes)
     self.meta.update(input_data["meta"])
     return self.meta
Example #42
0
 def execute(self, input_data):
     raw_bytes = input_data['sample']['raw_bytes']
     self.meta['sha1'] = hashlib.sha1(raw_bytes).hexdigest()
     self.meta['sha256'] = hashlib.sha256(raw_bytes).hexdigest()
     self.meta['ssdeep'] = ssd.hash(raw_bytes)
     self.meta['entropy'] = self._entropy(raw_bytes)
     self.meta.update(input_data['meta'])
     return self.meta
def main():
    """Entry function."""
    parser = argparse.ArgumentParser(
        description='Process Fuzzy hashing comparison between project url and \
        return project urls')

    parser.add_argument("input_csv_file", help="Specify the csv file to read")
    parser.add_argument(
        "number_urls", help="Number of found urls to process fuzzy hashing \
        (max = 10)")

    args = parser.parse_args()

    input_csv_file = args.input_csv_file
    number_urls = int(args.number_urls)

    header_names = ['acronym', 'title', 'projectUrl', 'foundProjectUrl1',
                    'foundProjectUrl2', 'foundProjectUrl3',
                    'foundProjectUrl4', 'foundProjectUrl5',
                    'foundProjectUrl6', 'foundProjectUrl7',
                    'foundProjectUrl8', 'foundProjectUrl9',
                    'foundProjectUrl10']
    df = pd.read_csv(input_csv_file, sep=',', quotechar='"',
                     names=header_names, index_col=False)

    df['projectUrlHash'] = np.nan
    df['foundProjectUrl1Hash'] = np.nan
    df['foundProjectUrl2Hash'] = np.nan
    df['foundProjectUrl3Hash'] = np.nan
    df['foundProjectUrl4Hash'] = np.nan
    df['foundProjectUrl5Hash'] = np.nan
    df['foundProjectUrl6Hash'] = np.nan
    df['foundProjectUrl7Hash'] = np.nan
    df['foundProjectUrl8Hash'] = np.nan
    df['foundProjectUrl9Hash'] = np.nan
    df['foundProjectUrl10Hash'] = np.nan
    df['MatchScore1'] = np.nan
    df['MatchScore2'] = np.nan
    df['MatchScore3'] = np.nan
    df['MatchScore4'] = np.nan
    df['MatchScore5'] = np.nan
    df['MatchScore6'] = np.nan
    df['MatchScore7'] = np.nan
    df['MatchScore8'] = np.nan
    df['MatchScore9'] = np.nan
    df['MatchScore10'] = np.nan

    for index, row in df.iterrows():
        print "computing fuzzy hash for project %s" % row['acronym']
        try:
            df.ix[index, 'projectUrlHash'] = ssdeep.hash(
                urllib2.urlopen(row['projectUrl'], timeout=10).read())
        except urllib2.HTTPError, e:
            print e.code
        except urllib2.URLError, e:
            print e.reason
Example #44
0
    def _run(self, scanObject, result, depth, args):
        '''
        Assumes:
            there is a string like object in scanObject.buffer
        Ensures:
            hash values added using scanObject.addMetadata

        Laika Config File Options:
            hashmd5:    "1" = md5.hexdigest,    "0" = omit
            hashSHA1:   "1" = sha1.hexdigest,   "0" = omit
            hashSHA256: "1" = sha256.hexdigest, "0" = omit
            hashSHA512: "1" = sha256.hexdigest, "0" = omit
            hashSHA1:   "1" = sha1.hexdigest,   "0" = omit
            ssdeep:     "1" = ssdeep.hash,      "0" = omit

        Function Arguments:
        :param scanObject:<laikaboss.objectmodel.ScanObject>
        :param result:<laikaboss.objectmodel.ScanResult>
        :param depth:<int>
        :param args:<dict> --execution flow controls--
                    Valid args names <str> (value must be 1, 0, "1", or "0")
                        1/"1": Generate the hash of named type
                        0/"0": Omit the hash of named type
                        default args:
                        {"md5":1,
                         "SHA1":0,
                         "SHA256":1,
                         "SHA512":1,
                         "ssdeep":0}

        :return: Always returns a empty list (no child objects)
        '''
        moduleResult = []
        metaDict = {}
        if int(get_option(args, 'md5', 'hashmd5', "md5" in self.module_defaults)):
            metaDict['md5'] = hashlib.md5(scanObject.buffer).hexdigest()
        if int(get_option(args, 'SHA1', 'hashSHA1', "SHA1" in self.module_defaults)):
            metaDict['SHA1'] = hashlib.sha1(scanObject.buffer).hexdigest()
        if int(get_option(args, 'SHA256', 'hashSHA256', "SHA256" in self.module_defaults)):
            metaDict['SHA256'] = hashlib.sha256(scanObject.buffer).hexdigest()
        if int(get_option(args, 'SHA512', 'hashSHA512', "SHA512" in self.module_defaults)):
            metaDict['SHA512'] = hashlib.sha512(scanObject.buffer).hexdigest()
        if int(get_option(args, 'ssdeep', 'hashssdeep', "ssdeep" in self.module_defaults)):
            #only import ssdeep if dispatched.
            #Prevents import error if you don't have/want the package
            #python should keep handing you the original, minimal/no overhead
            try:
                import ssdeep
                metaDict['ssdeep'] = ssdeep.hash(scanObject.buffer)
            except ImportError:
                metaDict['ssdeep'] = "" #indicate ssdeep was configured but failed


        scanObject.addMetadata(self.module_name, "HASHES", metaDict)
        
        return moduleResult
Example #45
0
File: pep.py Project: alrawi/pype
def processMeta(pe,fc, profile):
    profile[PROFILE.STATIC][META.fileSize]=len(fc)
    profile[PROFILE.STATIC][META.timeStamp]=pe.FILE_HEADER.TimeDateStamp
    profile[PROFILE.STATIC][META.dll]=pe.FILE_HEADER.IMAGE_FILE_DLL
    profile[PROFILE.STATIC][META.numberSec]=pe.FILE_HEADER.NumberOfSections
    profile[PROFILE.STATIC][META.importHash]=pe.get_imphash()
    profile[PROFILE.STATIC][META.md5]=hashlib.md5(fc).hexdigest()
    profile[PROFILE.STATIC][META.sha1]=hashlib.sha1(fc).hexdigest()
    profile[PROFILE.STATIC][META.ssdeep]=ssdeep.hash(fc)
    return profile
Example #46
0
 def _run(self, scanObject, result, depth, args):
     moduleResult = [] 
     metaDict = {}
     #metaDict['SHA224'] = hashlib.sha224(scanObject.buffer).hexdigest()
     metaDict['SHA256'] = hashlib.sha256(scanObject.buffer).hexdigest()
     #metaDict['SHA384'] = hashlib.sha384(scanObject.buffer).hexdigest()
     metaDict['SHA512'] = hashlib.sha512(scanObject.buffer).hexdigest()[0:32]
     metaDict['ssdeep'] = ssdeep.hash(scanObject.buffer)
     scanObject.addMetadata(self.module_name, "HASHES", metaDict)
     return moduleResult
Example #47
0
def META_BASIC_INFO(s, buff):

   BASIC_INFO = { 'MD5' : hashlib.md5(buff).hexdigest(),
                  'SHA1' : hashlib.sha1(buff).hexdigest(),
                  'SHA256' : hashlib.sha256(buff).hexdigest(),
                  'SHA512' : hashlib.sha512(buff).hexdigest(),
                  'ssdeep' : ssdeep.hash(buff),
                  'Size' : '%s bytes' % len(buff) }

   return BASIC_INFO
Example #48
0
def META_BASIC_INFO(s, buff):

   BASIC_INFO = OrderedDict([('MD5', hashlib.md5(buff).hexdigest()),
                           ('SHA1', hashlib.sha1(buff).hexdigest()),
                           ('SHA256', hashlib.sha256(buff).hexdigest()),
                           ('SHA512', hashlib.sha512(buff).hexdigest()),
                           ('ssdeep' , ssdeep.hash(buff)),
                           ('Size', '%s bytes' % len(buff))])

   return BASIC_INFO
Example #49
0
def get_hash_tuple(functions, filename):
    """ Creates the binary tuple for use in Malfunction and Mallearn

    Results in the form: (Binary Hash, [**ssdeep hashes])"""

    function_hashes = []
    binary_hash = get_binary_hash(filename)
    for function in functions:
        function_hashes.append(ssdeep.hash(function))
    return (binary_hash, function_hashes)
Example #50
0
 def execute(self, input_data):
     raw_bytes = input_data['sample']['raw_bytes']
     sha1 = hashlib.sha1(raw_bytes).hexdigest()
     sha256 = hashlib.sha256(raw_bytes).hexdigest()
     ssdeep = ssd.hash(raw_bytes)
     entropy = self._entropy(raw_bytes)
     output = {name:value for name,value in locals().iteritems()
             if name not in ['self', 'input_data','raw_bytes']}
     output.update(input_data['meta'])
     return output
Example #51
0
    def build_apk_sample(self, data, url = None):
        sample = {
            "md5"   : hashlib.md5(data).hexdigest(),
            "sha1"  : hashlib.sha1(data).hexdigest(),
            "raw"   : data,
            "data"  : base64.b64encode(data),
            "type"  : "APK",
        }

        if SSDEEP:
            sample['ssdeep'] = ssdeep.hash(data)

        return sample
Example #52
0
def set_hash(cid):
    c = Content.objects.get(id=cid)
    if c:
        if c.content:
            try:
                encoded = c.content.encode("utf-8")
                c.sha1 = str(hashlib.sha1(encoded).hexdigest())
                c.sha256 = str(hashlib.sha256(encoded).hexdigest())
                c.sha512 = str(hashlib.sha512(encoded).hexdigest())
                c.ssdeep = str(ssdeep.hash(encoded))
                c.save()
            except Exception as e:
                logger.error(str(e))
    return c
Example #53
0
    def get_hashes(self):

        hashes = {}
        # Calculate hashes
        with open(self.path) as handle:
            filedata = handle.read()
            hashes = {
            'md5': hashlib.md5(filedata).hexdigest(),
            'sha1': hashlib.sha1(filedata).hexdigest(),
            'sha256': hashlib.sha256(filedata).hexdigest(),
            'sha512': hashlib.sha512(filedata).hexdigest(),
            'crc32': "%08X" % (binascii.crc32(filedata) & 0xFFFFFFFF),
            'ssdeep': ssdeep.hash(filedata),
            }
        return hashes
Example #54
0
    def get_hashes(self):

        hashes = {}
        # Calculate hashes
        with open(self.path) as handle:
            filedata = handle.read()
            hashes = {
                "md5": hashlib.md5(filedata).hexdigest(),
                "sha1": hashlib.sha1(filedata).hexdigest(),
                "sha256": hashlib.sha256(filedata).hexdigest(),
                "sha512": hashlib.sha512(filedata).hexdigest(),
                "crc32": "%08X" % (binascii.crc32(filedata) & 0xFFFFFFFF),
                "ssdeep": ssdeep.hash(filedata),
            }
        return hashes
Example #55
0
def get_ssdeep(payload):
    """
    Generate ssdeep hash of a payload

    :param payload: The payload to be hashed.

    :returns: ssdeep hash
    :rtype: str or None

    """
    try:
        fuzzy = ssdeep.hash(payload)
    except:
        fuzzy = None

    return fuzzy
Example #56
0
    def get_page_data(response):
        page = WebPage()
        page['uri'] = response.url
        page['status_code'] = response.status

        if 'screenshot' in response.meta:
            page['screenshot'] = response.meta['screenshot']
            page['ssdeep_pagesource'] = str(ssdeep.hash(response.body))

            try:
                screenshot_hash = ssdeep.hash_from_file(response.meta['screenshot'])
                page['ssdeep_screenshot'] = screenshot_hash
            except:
                log.msg("Could not create hash from screenshot: " + response.meta['screenshot'], level=log.DEBUG)

        return page
Example #57
0
	def fileHash(self, filePath):
		
		try:
			with open(filePath, 'rb') as fh:
				data = fh.read()
			m = hashlib.md5()
			m.update(data)
			md5 = m.hexdigest()
			try:
				import ssdeep
				deep = ssdeep.hash(data)
			except:
				deep = "Null"
		except:
			md5 = "Null"
			deep = "Null"
		return md5, deep
Example #58
0
    def get_page_info(self):
        page = WebPage()
        page['uri'] = self.response.url
        page['status_code'] = self.response.status
        page['useragent'] = self.response.meta.get('User-Agent')
        page['referer'] = self.response.meta.get('Referer')

        if 'screenshot' in self.response.meta:
            page['screenshot'] = self.response.meta['screenshot']
            page['ssdeep_pagesource'] = str(ssdeep.hash(self.response.body))

            try:
                screenshot_hash = ssdeep.hash_from_file(self.response.meta['screenshot'])
                page['ssdeep_screenshot'] = screenshot_hash
            except:
                log.msg("Could not create hash from screenshot: " + self.response.meta['screenshot'], level=log.DEBUG)
        return page
Example #59
0
def load_data_from_results_file(path):
    c2_bases = {}

    with open(path, "r") as f:
        for line in f:
            line = line.strip()
            data = json.loads(line)
            data['content'] = data['content'].decode('hex')
            data['content_ssdeep'] = ssdeep.hash(data['content'])

            if data['base_url'] not in c2_bases:
                c2_bases[data['base_url']] = {}
            data["offset"] = data["url"][len(data["base_url"]):]
            print "{0}  -  {1}  -  {2}".format(data['code'], data['base_url'], data['offset'])
            c2_bases[data['base_url']][data['offset']] = data

    return c2_bases
Example #60
0
	def fileHash(self, filePath, mdHash, deepHash):
		try:
			with open(filePath, 'rb') as fh:
				data = fh.read()
		except:
			print "unable to open file %s" % filePath
			data = None
		if data != None and mdHash == True:
			m = hashlib.md5()
			m.update(data)
			md5 = m.hexdigest()
		else:
			md5 = "Null"
		if data != None and deepHash == True:
			import ssdeep
			deep = ssdeep.hash(data)
		else:
			deep = "Null"
		return md5, deep