Python Database.find Examples

Programming Language: Python

Namespace/Package Name: lib.core.database

Class/Type: Database

Method/Function: find

Examples at hotexamples.com: 25

Python Database.find - 25 examples found. These are the top rated real world Python examples of lib.core.database.Database.find extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Database(30)

find(13)

save_task(4)

save_reports(4)

update_task(4)

set_status(3)

add(3)

add_tags(2)

add_token(2)

get_token_list(2)

addStreamToHost(1)

get_task(1)

stopSession(1)

addBaseline(1)

addHost(1)

list_tags(1)

get_tasks(1)

get_task_pid(1)

get_reports(1)

get_sample_count(1)

add_note(1)

get_investigation_list(1)

get_investigation_count(1)

addStream(1)

delete_token(1)

delete_tag(1)

delete_file(1)

createSession(1)

getHost(1)

Example #1

Show file

    def peid(self):
        def get_signatures():
            with file(os.path.join(CIRTKIT_ROOT, 'data/peid/UserDB.TXT'),
                      'rt') as f:
                sig_data = f.read()

            signatures = peutils.SignatureDatabase(data=sig_data)

            return signatures

        def get_matches(pe, signatures):
            matches = signatures.match_all(pe, ep_only=True)
            return matches

        if not self.__check_session():
            return

        signatures = get_signatures()
        peid_matches = get_matches(self.pe, signatures)

        if peid_matches:
            self.log('info', "PEiD Signatures:")
            for sig in peid_matches:
                if type(sig) is list:
                    self.log('item', sig[0])
                else:
                    self.log('item', sig)
        else:
            self.log('info', "No PEiD signatures matched.")

        if self.args.scan and peid_matches:
            self.log('info', "Scanning the repository for matching samples...")

            db = Database()
            samples = db.find(key='all')

            matches = []
            for sample in samples:
                if sample.sha256 == __sessions__.current.file.sha256:
                    continue

                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_pe = pefile.PE(sample_path)
                    cur_peid_matches = get_matches(cur_pe, signatures)
                except:
                    continue

                if peid_matches == cur_peid_matches:
                    matches.append([sample.name, sample.sha256])

            self.log('info',
                     "{0} relevant matches found".format(bold(len(matches))))

            if len(matches) > 0:
                self.log('table', dict(header=['Name', 'SHA256'],
                                       rows=matches))

Example #2

Show file

File: pe.py Project: killbug2004/CIRTKit

    def peid(self):

        def get_signatures():
            with file(os.path.join(CIRTKIT_ROOT, 'data/peid/UserDB.TXT'), 'rt') as f:
                sig_data = f.read()

            signatures = peutils.SignatureDatabase(data=sig_data)

            return signatures

        def get_matches(pe, signatures):
            matches = signatures.match_all(pe, ep_only=True)
            return matches

        if not self.__check_session():
            return

        signatures = get_signatures()
        peid_matches = get_matches(self.pe, signatures)

        if peid_matches:
            self.log('info', "PEiD Signatures:")
            for sig in peid_matches:
                if type(sig) is list:
                    self.log('item', sig[0])
                else:
                    self.log('item', sig)
        else:
            self.log('info', "No PEiD signatures matched.")

        if self.args.scan and peid_matches:
            self.log('info', "Scanning the repository for matching samples...")

            db = Database()
            samples = db.find(key='all')

            matches = []
            for sample in samples:
                if sample.sha256 == __sessions__.current.file.sha256:
                    continue

                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_pe = pefile.PE(sample_path)
                    cur_peid_matches = get_matches(cur_pe, signatures)
                except:
                    continue

                if peid_matches == cur_peid_matches:
                    matches.append([sample.name, sample.sha256])

            self.log('info', "{0} relevant matches found".format(bold(len(matches))))

            if len(matches) > 0:
                self.log('table', dict(header=['Name', 'SHA256'], rows=matches))

Example #3

Show file

File: pe.py Project: killbug2004/CIRTKit

    def pehash(self):
        if not HAVE_PEHASH:
            self.log('error', "PEhash is missing. Please copy PEhash to the modules directory of Viper")
            return

        current_pehash = None
        if __sessions__.is_set():
            current_pehash = calculate_pehash(__sessions__.current.file.path)
            self.log('info', "PEhash: {0}".format(bold(current_pehash)))

        if self.args.all or self.args.cluster or self.args.scan:
            db = Database()
            samples = db.find(key='all')

            rows = []
            for sample in samples:
                sample_path = get_sample_path(sample.sha256)
                pe_hash = calculate_pehash(sample_path)
                if pe_hash:
                    rows.append((sample.name, sample.md5, pe_hash))

        if self.args.all:
            self.log('info', "PEhash for all files:")
            header = ['Name', 'MD5', 'PEhash']
            self.log('table', dict(header=header, rows=rows))

        elif self.args.cluster:
            self.log('info', "Clustering files by PEhash...")

            cluster = {}
            for sample_name, sample_md5, pe_hash in rows:
                cluster.setdefault(pe_hash, []).append([sample_name, sample_md5])

            for item in cluster.items():
                if len(item[1]) > 1:
                    self.log('info', "PEhash cluster {0}:".format(bold(item[0])))
                    self.log('table', dict(header=['Name', 'MD5'], rows=item[1]))

        elif self.args.scan:
            if __sessions__.is_set() and current_pehash:
                self.log('info', "Finding matching samples...")

                matches = []
                for row in rows:
                    if row[1] == __sessions__.current.file.md5:
                        continue

                    if row[2] == current_pehash:
                        matches.append([row[0], row[1]])

                if matches:
                    self.log('table', dict(header=['Name', 'MD5'], rows=matches))
                else:
                    self.log('info', "No matches found")

Example #4

Show file

    def compiletime(self):
        def get_compiletime(pe):
            return datetime.datetime.fromtimestamp(
                pe.FILE_HEADER.TimeDateStamp)

        if not self.__check_session():
            return

        compile_time = get_compiletime(self.pe)
        self.log('info', "Compile Time: {0}".format(bold(compile_time)))

        if self.args.scan:
            self.log('info', "Scanning the repository for matching samples...")

            db = Database()
            samples = db.find(key='all')

            matches = []
            for sample in samples:
                if sample.sha256 == __sessions__.current.file.sha256:
                    continue

                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_pe = pefile.PE(sample_path)
                    cur_compile_time = get_compiletime(cur_pe)
                except:
                    continue

                if compile_time == cur_compile_time:
                    matches.append([sample.name, sample.md5, cur_compile_time])
                else:
                    if self.args.window:
                        if cur_compile_time > compile_time:
                            delta = (cur_compile_time - compile_time)
                        elif cur_compile_time < compile_time:
                            delta = (compile_time - cur_compile_time)

                        delta_minutes = int(delta.total_seconds()) / 60
                        if delta_minutes <= self.args.window:
                            matches.append(
                                [sample.name, sample.md5, cur_compile_time])

            self.log('info',
                     "{0} relevant matches found".format(bold(len(matches))))

            if len(matches) > 0:
                self.log(
                    'table',
                    dict(header=['Name', 'MD5', 'Compile Time'], rows=matches))

Example #5

Show file

File: commands.py Project: ziedsn/CIRTKit

    def cmd_tags(self, *args):
        parser = argparse.ArgumentParser(prog='tags', description="Modify tags of the opened file")
        parser.add_argument('-a', '--add', metavar='TAG', help="Add tags to the opened file (comma separated)")
        parser.add_argument('-d', '--delete', metavar='TAG', help="Delete a tag from the opened file")
        try:
            args = parser.parse_args(args)
        except:
            return

        # This command requires a session to be opened.
        if not __sessions__.is_set():
            self.log('error', "No session opened")
            parser.print_usage()
            return

        # If no arguments are specified, there's not much to do.
        # However, it could make sense to also retrieve a list of existing
        # tags from this command, and not just from the "find" command alone.
        if args.add is None and args.delete is None:
            parser.print_usage()
            return

        # TODO: handle situation where addition or deletion of a tag fail.

        db = Database()
        if not db.find(key='sha256', value=__sessions__.current.file.sha256):
            self.log('error', "The opened file is not stored in the database. "
                "If you want to add it use the `store` command.")
            return

        if args.add:
            # Add specified tags to the database's entry belonging to
            # the opened file.
            db.add_tags(__sessions__.current.file.sha256, args.add)
            self.log('info', "Tags added to the currently opened file")

            # We refresh the opened session to update the attributes.
            # Namely, the list of tags returned by the 'info' command
            # needs to be re-generated, or it wouldn't show the new tags
            # until the existing session is closed a new one is opened.
            self.log('info', "Refreshing session to update attributes...")
            __sessions__.new(__sessions__.current.file.path)

        if args.delete:
            # Delete the tag from the database.
            db.delete_tag(args.delete, __sessions__.current.file.sha256)
            # Refresh the session so that the attributes of the file are
            # updated.
            self.log('info', "Refreshing session to update attributes...")
            __sessions__.new(__sessions__.current.file.path)

Example #6

Show file

File: pe.py Project: killbug2004/CIRTKit

    def compiletime(self):

        def get_compiletime(pe):
            return datetime.datetime.fromtimestamp(pe.FILE_HEADER.TimeDateStamp)

        if not self.__check_session():
            return

        compile_time = get_compiletime(self.pe)
        self.log('info', "Compile Time: {0}".format(bold(compile_time)))

        if self.args.scan:
            self.log('info', "Scanning the repository for matching samples...")

            db = Database()
            samples = db.find(key='all')

            matches = []
            for sample in samples:
                if sample.sha256 == __sessions__.current.file.sha256:
                    continue

                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_pe = pefile.PE(sample_path)
                    cur_compile_time = get_compiletime(cur_pe)
                except:
                    continue

                if compile_time == cur_compile_time:
                    matches.append([sample.name, sample.md5, cur_compile_time])
                else:
                    if self.args.window:
                        if cur_compile_time > compile_time:
                            delta = (cur_compile_time - compile_time)
                        elif cur_compile_time < compile_time:
                            delta = (compile_time - cur_compile_time)

                        delta_minutes = int(delta.total_seconds()) / 60
                        if delta_minutes <= self.args.window:
                            matches.append([sample.name, sample.md5, cur_compile_time])

            self.log('info', "{0} relevant matches found".format(bold(len(matches))))

            if len(matches) > 0:
                self.log('table', dict(header=['Name', 'MD5', 'Compile Time'], rows=matches))

Example #7

Show file

File: editdistance.py Project: ziedsn/CIRTKit

    def edit(self):
        db = Database()
        samples = db.find(key='all')

        filenames = []
        for sample in samples:
            if sample.sha256 == __sessions__.current.file.sha256:
                continue

            filenames.append(sample.name)

        # from http://hetland.org/coding/python/levenshtein.py
        def levenshtein(a, b):
            "Calculates the Levenshtein distance between a and b."
            n, m = len(a), len(b)
            if n > m:
                # Make sure n <= m, to use O(min(n,m)) space
                a, b = b, a
                n, m = m, n

            current = range(n + 1)
            for i in range(1, m + 1):
                previous, current = current, [i] + [0] * n
                for j in range(1, n + 1):
                    add, delete = previous[j] + 1, current[j - 1] + 1
                    change = previous[j - 1]
                    if a[j - 1] != b[i - 1]:
                        change = change + 1
                    current[j] = min(add, delete, change)

            return current[n]

        distance = []
        for i in itertools.combinations(filenames, 2):
            edit = levenshtein(i[0], i[1])
            distance.append(edit)

        self.log(
            'info',
            "Average Edit distance: {0}".format(sum(distance) / len(distance)))

Example #8

Show file

File: editdistance.py Project: killbug2004/CIRTKit

    def edit(self):
        db = Database()
        samples = db.find(key='all')

        filenames = []
        for sample in samples:
            if sample.sha256 == __sessions__.current.file.sha256:
                continue

            filenames.append(sample.name)

        # from http://hetland.org/coding/python/levenshtein.py
        def levenshtein(a, b):
            "Calculates the Levenshtein distance between a and b."
            n, m = len(a), len(b)
            if n > m:
                # Make sure n <= m, to use O(min(n,m)) space
                a, b = b, a
                n, m = m, n

            current = range(n + 1)
            for i in range(1, m + 1):
                previous, current = current, [i] + [0] * n
                for j in range(1, n + 1):
                    add, delete = previous[j] + 1, current[j - 1] + 1
                    change = previous[j - 1]
                    if a[j - 1] != b[i - 1]:
                        change = change + 1
                    current[j] = min(add, delete, change)

            return current[n]

        distance = []
        for i in itertools.combinations(filenames, 2):
            edit = levenshtein(i[0], i[1])
            distance.append(edit)

        self.log('info', "Average Edit distance: {0}".format(sum(distance) / len(distance)))

Example #9

Show file

        def get_signed_samples(current=None, cert_filter=None):
            db = Database()
            samples = db.find(key='all')

            results = []
            for sample in samples:
                # Skip if it's the same file.
                if current:
                    if sample.sha256 == current:
                        continue

                # Obtain path to the binary.
                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                # Open PE instance.
                try:
                    cur_pe = pefile.PE(sample_path)
                except:
                    continue

                cur_cert_data = get_certificate(cur_pe)

                if not cur_cert_data:
                    continue

                cur_cert_md5 = get_md5(cur_cert_data)

                if cert_filter:
                    if cur_cert_md5 == cert_filter:
                        results.append([sample.name, sample.md5])
                else:
                    results.append([sample.name, sample.md5, cur_cert_md5])

            return results

Example #10

Show file

File: pe.py Project: killbug2004/CIRTKit

        def get_signed_samples(current=None, cert_filter=None):
            db = Database()
            samples = db.find(key='all')

            results = []
            for sample in samples:
                # Skip if it's the same file.
                if current:
                    if sample.sha256 == current:
                        continue

                # Obtain path to the binary.
                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                # Open PE instance.
                try:
                    cur_pe = pefile.PE(sample_path)
                except:
                    continue

                cur_cert_data = get_certificate(cur_pe)

                if not cur_cert_data:
                    continue

                cur_cert_md5 = get_md5(cur_cert_data)

                if cert_filter:
                    if cur_cert_md5 == cert_filter:
                        results.append([sample.name, sample.md5])
                else:
                    results.append([sample.name, sample.md5, cur_cert_md5])

            return results

Example #11

Show file

File: fuzzy.py Project: killbug2004/CIRTKit

    def run(self):
        super(Fuzzy, self).run()

        if not HAVE_PYDEEP:
            self.log('error', "Missing dependency, install pydeep (`pip install pydeep`)")
            return

        arg_verbose = False
        arg_cluster = False
        if self.args:
            if self.args.verbose:
                arg_verbose = self.args.verbose
            if self.args.cluster:
                arg_cluster = self.args.cluster

            db = Database()
            samples = db.find(key='all')

            # Check if we're operating in cluster mode, otherwise we run on the
            # currently opened file.
            if arg_cluster:
                self.log('info', "Generating clusters, this might take a while...")

                clusters = dict()
                for sample in samples:
                    if not sample.ssdeep:
                        continue

                    if arg_verbose:
                        self.log('info', "Testing file {0} with ssdeep {1}".format(
                            sample.md5, sample.ssdeep))

                    clustered = False
                    for cluster_name, cluster_members in clusters.items():
                        # Check if sample is already in the cluster.
                        if sample.md5 in cluster_members:
                            continue

                        if arg_verbose:
                            self.log('info', "Testing {0} in cluser {1}".format(
                                sample.md5, cluster_name))
                        
                        for member in cluster_members:
                            if sample.md5 == member[0]:
                                continue

                            member_hash = member[0]
                            member_name = member[1]

                            member_ssdeep = db.find(key='md5', value=member_hash)[0].ssdeep
                            if pydeep.compare(sample.ssdeep, member_ssdeep) > 40:
                                if arg_verbose:
                                    self.log('info', "Found home for {0} in cluster {1}".format(
                                        sample.md5, cluster_name))

                                clusters[cluster_name].append([sample.md5, sample.name])
                                clustered = True
                                break

                    if not clustered:
                        cluster_id = len(clusters) + 1
                        clusters[cluster_id] = [[sample.md5, sample.name],]

                ordered_clusters = collections.OrderedDict(sorted(clusters.items()))

                self.log('info', "Following are the identified clusters with more than one member")

                for cluster_name, cluster_members in ordered_clusters.items():
                    # We include in the results only clusters with more than just
                    # one member.
                    if len(cluster_members) <= 1:
                        continue

                    self.log('info', "Ssdeep cluster {0}".format(bold(cluster_name)))

                    self.log('table', dict(header=['MD5', 'Name'],
                        rows=cluster_members))

            # We're running against the already opened file.
            else:
                if not __sessions__.is_set():
                    self.log('error', "No session opened")
                    return

                if not __sessions__.current.file.ssdeep:
                    self.log('error', "No ssdeep hash available for opened file")
                    return

                matches = []
                for sample in samples:
                    if sample.sha256 == __sessions__.current.file.sha256:
                        continue

                    if not sample.ssdeep:
                        continue

                    score = pydeep.compare(__sessions__.current.file.ssdeep,
                        sample.ssdeep)

                    if score > 40:
                        matches.append(['{0}%'.format(score), sample.name,
                            sample.sha256])

                    if arg_verbose:
                        self.log('info', "Match {0}%: {2} [{1}]".format(score,
                            sample.name, sample.sha256))

                self.log('info', "{0} relevant matches found".format(bold(len(matches))))

                if len(matches) > 0:
                    self.log('table', dict(header=['Score', 'Name', 'SHA256'],
                        rows=matches))

Example #12

Show file

    def language(self):
        def get_iat(pe):
            iat = []
            if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
                for peimport in pe.DIRECTORY_ENTRY_IMPORT:
                    iat.append(peimport.dll)

            return iat

        def check_module(iat, match):
            for imp in iat:
                if imp.find(match) != -1:
                    return True

            return False

        def is_cpp(data, cpp_count):
            for line in data:
                if 'type_info' in line or 'RTTI' in line:
                    cpp_count += 1
                    break

            if cpp_count == 2:
                return True

            return False

        def is_delphi(data):
            for line in data:
                if 'Borland' in line:
                    path = line.split('\\')
                    for p in path:
                        if 'Delphi' in p:
                            return True
            return False

        def is_vbdotnet(data):
            for line in data:
                if 'Compiler' in line:
                    stuff = line.split('.')
                    if 'VisualBasic' in stuff:
                        return True

            return False

        def is_autoit(data):
            for line in data:
                if 'AU3!' in line:
                    return True

            return False

        def is_packed(pe):
            for section in pe.sections:
                if section.get_entropy() > 7:
                    return True

            return False

        def get_strings(content):
            regexp = '[\x30-\x39\x41-\x5f\x61-\x7a\-\.:]{4,}'
            return re.findall(regexp, content)

        def find_language(iat, sample, content):
            dotnet = False
            cpp_count = 0
            found = None

            # VB check
            if check_module(iat, 'VB'):
                self.log(
                    'info', "{0} - Possible language: Visual Basic".format(
                        sample.name))
                return True

            # .NET check
            if check_module(iat, 'mscoree.dll') and not found:
                dotnet = True
                found = '.NET'

            # C DLL check
            if not found and (check_module(iat, 'msvcr') or check_module(
                    iat, 'MSVCR') or check_module(iat, 'c++')):
                cpp_count += 1

            if not found:
                data = get_strings(content)

                if is_cpp(data, cpp_count) and not found:
                    found = 'CPP'
                if not found and cpp_count == 1:
                    found = 'C'
                if not dotnet and is_delphi(data) and not found:
                    found = 'Delphi'
                if dotnet and is_vbdotnet(data):
                    found = 'Visual Basic .NET'
                if is_autoit(data) and not found:
                    found = 'AutoIt'

            return found

        if not self.__check_session():
            return

        if is_packed(self.pe):
            self.log(
                'warning',
                "Probably packed, the language guess might be unreliable")

        language = find_language(get_iat(self.pe), __sessions__.current.file,
                                 __sessions__.current.file.data)

        if language:
            self.log('info', "Probable language: {0}".format(bold(language)))
        else:
            self.log('error', "Programming language not identified")
            return

        if self.args.scan:
            self.log('info', "Scanning the repository for matching samples...")

            db = Database()
            samples = db.find(key='all')

            matches = []
            for sample in samples:
                if sample.sha256 == __sessions__.current.file.sha256:
                    continue

                sample_path = get_sample_path(sample.sha256)

                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_pe = pefile.PE(sample_path)
                except pefile.PEFormatError as e:
                    continue

                cur_packed = ''
                if is_packed(cur_pe):
                    cur_packed = 'Yes'

                cur_language = find_language(get_iat(cur_pe), sample,
                                             open(sample_path, 'rb').read())

                if not cur_language:
                    continue

                if cur_language == language:
                    matches.append([sample.name, sample.md5, cur_packed])

            if matches:
                self.log(
                    'table',
                    dict(header=['Name', 'MD5', 'Is Packed'], rows=matches))
            else:
                self.log('info', "No matches found")

Example #13

Show file

    def imphash(self):
        if self.args.scan and self.args.cluster:
            self.log('error', "You selected two exclusive options, pick one")
            return

        if self.args.cluster:
            self.log('info', "Clustering all samples by imphash...")

            db = Database()
            samples = db.find(key='all')

            cluster = {}
            for sample in samples:
                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_imphash = pefile.PE(sample_path).get_imphash()
                except:
                    continue

                if cur_imphash not in cluster:
                    cluster[cur_imphash] = []

                cluster[cur_imphash].append([sample.sha256, sample.name])

            for cluster_name, cluster_members in cluster.items():
                # Skipping clusters with only one entry.
                if len(cluster_members) == 1:
                    continue

                self.log('info',
                         "Imphash cluster {0}".format(bold(cluster_name)))

                self.log('table',
                         dict(header=['MD5', 'Name'], rows=cluster_members))

            return

        if self.__check_session():
            try:
                imphash = self.pe.get_imphash()
            except AttributeError:
                self.log(
                    'error',
                    "No imphash support, upgrade pefile to a version >= 1.2.10-139 (`pip install --upgrade pefile`)"
                )
                return

            self.log('info', "Imphash: {0}".format(bold(imphash)))

            if self.args.scan:
                self.log('info',
                         "Scanning the repository for matching samples...")

                db = Database()
                samples = db.find(key='all')

                matches = []
                for sample in samples:
                    if sample.sha256 == __sessions__.current.file.sha256:
                        continue

                    sample_path = get_sample_path(sample.sha256)
                    if not os.path.exists(sample_path):
                        continue

                    try:
                        cur_imphash = pefile.PE(sample_path).get_imphash()
                    except:
                        continue

                    if imphash == cur_imphash:
                        matches.append([sample.name, sample.sha256])

                self.log(
                    'info',
                    "{0} relevant matches found".format(bold(len(matches))))

                if len(matches) > 0:
                    self.log('table',
                             dict(header=['Name', 'SHA256'], rows=matches))

Example #14

Show file

    def resources(self):

        # Use this function to retrieve resources for the given PE instance.
        # Returns all the identified resources with indicators and attributes.
        def get_resources(pe):
            resources = []
            if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'):
                count = 1
                for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries:
                    try:
                        resource = {}

                        if resource_type.name is not None:
                            name = str(resource_type.name)
                        else:
                            name = str(
                                pefile.RESOURCE_TYPE.get(
                                    resource_type.struct.Id))

                        if name is None:
                            name = str(resource_type.struct.Id)

                        if hasattr(resource_type, 'directory'):
                            for resource_id in resource_type.directory.entries:
                                if hasattr(resource_id, 'directory'):
                                    for resource_lang in resource_id.directory.entries:
                                        data = pe.get_data(
                                            resource_lang.data.struct.
                                            OffsetToData,
                                            resource_lang.data.struct.Size)
                                        filetype = get_type(data)
                                        md5 = get_md5(data)
                                        language = pefile.LANG.get(
                                            resource_lang.data.lang, None)
                                        sublanguage = pefile.get_sublang_name_for_lang(
                                            resource_lang.data.lang,
                                            resource_lang.data.sublang)
                                        offset = ('%-8s' %
                                                  hex(resource_lang.data.struct
                                                      .OffsetToData)).strip()
                                        size = (
                                            '%-8s' %
                                            hex(resource_lang.data.struct.Size)
                                        ).strip()

                                        resource = [
                                            count, name, offset, md5, size,
                                            filetype, language, sublanguage
                                        ]

                                        # Dump resources if requested to and if the file currently being
                                        # processed is the opened session file.
                                        # This is to avoid that during a --scan all the resources being
                                        # scanned are dumped as well.
                                        if (self.args.open or self.args.dump
                                            ) and pe == self.pe:
                                            if self.args.dump:
                                                folder = self.args.dump
                                            else:
                                                folder = tempfile.mkdtemp()

                                            resource_path = os.path.join(
                                                folder, '{0}_{1}_{2}'.format(
                                                    __sessions__.current.file.
                                                    md5, offset, name))
                                            resource.append(resource_path)

                                            with open(resource_path,
                                                      'wb') as resource_handle:
                                                resource_handle.write(data)

                                        resources.append(resource)

                                        count += 1
                    except Exception as e:
                        self.log('error', e)
                        continue

            return resources

        if not self.__check_session():
            return

        # Obtain resources for the currently opened file.
        resources = get_resources(self.pe)

        if not resources:
            self.log('warning', "No resources found")
            return

        headers = [
            '#', 'Name', 'Offset', 'MD5', 'Size', 'File Type', 'Language',
            'Sublanguage'
        ]
        if self.args.dump or self.args.open:
            headers.append('Dumped To')

        self.log('table', dict(header=headers, rows=resources))

        # If instructed, open a session on the given resource.
        if self.args.open:
            for resource in resources:
                if resource[0] == self.args.open:
                    __sessions__.new(resource[8])
                    return
        # If instructed to perform a scan across the repository, start looping
        # through all available files.
        elif self.args.scan:
            self.log('info', "Scanning the repository for matching samples...")

            # Retrieve list of samples stored locally and available in the
            # database.
            db = Database()
            samples = db.find(key='all')

            matches = []
            for sample in samples:
                # Skip if it's the same file.
                if sample.sha256 == __sessions__.current.file.sha256:
                    continue

                # Obtain path to the binary.
                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                # Open PE instance.
                try:
                    cur_pe = pefile.PE(sample_path)
                except:
                    continue

                # Obtain the list of resources for the current iteration.
                cur_resources = get_resources(cur_pe)
                matched_resources = []
                # Loop through entry's resources.
                for cur_resource in cur_resources:
                    # Loop through opened file's resources.
                    for resource in resources:
                        # If there is a common resource, add it to the list.
                        if cur_resource[3] == resource[3]:
                            matched_resources.append(resource[3])

                # If there are any common resources, add the entry to the list
                # of matched samples.
                if len(matched_resources) > 0:
                    matches.append([
                        sample.name, sample.md5,
                        '\n'.join(r for r in matched_resources)
                    ])

            self.log('info',
                     "{0} relevant matches found".format(bold(len(matches))))

            if len(matches) > 0:
                self.log(
                    'table',
                    dict(header=['Name', 'MD5', 'Resource MD5'], rows=matches))

Example #15

Show file

File: commands.py Project: ziedsn/CIRTKit

class Commands(object):

    output = []
    
    def __init__(self):
        # Open connection to the database.
        self.db = Database()

        # Map commands to their related functions.
        self.commands = dict(
            help=dict(obj=self.cmd_help, description="Show this help message"),
            open=dict(obj=self.cmd_open, description="Open a file"),
            new=dict(obj=self.cmd_new, description="Create new file"),
            close=dict(obj=self.cmd_close, description="Close the current session"),
            info=dict(obj=self.cmd_info, description="Show information on the opened file"),
            notes=dict(obj=self.cmd_notes, description="View, add and edit notes in the current investigation"),
            clear=dict(obj=self.cmd_clear, description="Clear the console"),
            store=dict(obj=self.cmd_store, description="Store the opened file to the local repository"),
            delete=dict(obj=self.cmd_delete, description="Delete the opened file"),
            find=dict(obj=self.cmd_find, description="Find a file"),
            tags=dict(obj=self.cmd_tags, description="Modify tags of the opened file"),
            sessions=dict(obj=self.cmd_sessions, description="List or switch sessions"),
            stats=dict(obj=self.cmd_stats, description="Collection Statistics"),
            investigations=dict(obj=self.cmd_investigations, description="List or switch current investigations"),
            export=dict(obj=self.cmd_export, description="Export the current session to file or zip"),
            modules=dict(obj=self.cmd_modules, description="List available modules"),
            integrate=dict(obj=self.cmd_integrate, description="Interact with available integrations"),
            tokens=dict(obj=self.cmd_tokens, description="Store and retrieve API tokens for integrations and modules"),
        )
        
    # Output Logging
    def log(self, event_type, event_data):
        self.output.append(dict(
            type=event_type,
            data=event_data
        ))
        
    ##
    # CLEAR
    #
    # This command simply clears the shell.
    def cmd_clear(self, *args):
        os.system('clear')

    ##
    # HELP
    #
    # This command simply prints the help message.
    # It lists both embedded commands and loaded modules.
    def cmd_help(self, *args):
        self.log('info', "Commands")

        # Build table of commands from commands dict above
        rows = []
        for command_name, command_item in self.commands.items():
            rows.append([command_name, command_item['description']])

        rows.append(["exit, quit", "Quit CIRTKit"])
        rows = sorted(rows, key=lambda entry: entry[0])

        self.log('table', dict(header=['Command', 'Description'], rows=rows))

    ##
    # MODULES
    #
    # Lists all modules discovered in the modules directory
    # and lists them by package name, by enumerating
    # over the directory we can rerun dynamically
    def cmd_modules(self, *args):

        parser = argparse.ArgumentParser(prog='modules', description="Lists modules", epilog="You can also specify -r to dynamically pickup new modules")
        parser.add_argument('-r', '--reload', action='store_true', help="Reload modules")

        moduleDict = __modules__

        try:
            args = parser.parse_args(args)
        except:
            return

        if args.reload:
            from lib.core.plugins import load_modules
            moduleDict = load_modules()

        rows = []
        for module_name, module_item in moduleDict.items():
            rows.append([module_name, module_item['description']])

        rows = sorted(rows, key=lambda entry: entry[0])

        # Build table of modules from the modules available
        self.log('info', "Modules")
        self.log('table', dict(header=['Command', 'Description'], rows=rows))

    ##
    # INTEGRATIONS
    #
    # Lists all integrations available
    def cmd_integrate(self, *args):

        parser = argparse.ArgumentParser(prog='integrate', description="Load integrations")
        parser.add_argument('-n', '--name', type=str, nargs=1, help="Load integration by name")
        parser.add_argument('-a', '--all', action='store_true', help="List all available integrations")

        integrateDict = __integrations__

        try:
            args = parser.parse_args(args)
        except:
            return

        rows = []
        for name, desc in integrateDict.items():
            rows.append([name, desc['description']])

        if args.all:
            rows = sorted(rows, key=lambda entry: entry[0])

            # Build table of integrations
            self.log('info', "Integrations")
            self.log('table', dict(header=['Name', 'Description'], rows=rows))

        elif args.name:
            name = args.name[0]
            item = integrateDict[args.name[0]]['obj']()
            print_info("Loading {0}\n".format(name))
            item.load()

        else:
            parser.print_help()

    ##
    # TOKENS
    #
    # Store and retrieve tokens for app integrations
    # and for other modules used in cirtkit
    def cmd_tokens(self, *args):

        parser = argparse.ArgumentParser(prog='tokens', description="Store and retrieve API tokens")
        parser.add_argument('-d', '--delete', type=int, metavar="Token_ID", nargs=1, help="Delete token by ID")
        parser.add_argument('-a', '--add', action='store_true', help="Add a new API token")
        parser.add_argument('-l', '--list', action='store_true', help="List all configured API tokens")

        try:
            args = parser.parse_args(args)
        except:
            return

        if args.delete:
            tokenid = args.delete
            self.db.delete_token(tokenid)
            print_success("Token {0} deleted successfully!".format(tokenid))
        elif args.add:
            print_info("Application new token will be used for:")
            appname = input("> ")
            print_info("API Token:")
            apitoken = input("> ")
            print_info("Username (if applicable):")
            username = input("> ")
            print_info("FQDN of remote server (format: ex.server.com:8000):")
            hostname = input("> ")
            if len(username) == 0:
                username = ""

            self.db.add_token(apitoken, username, appname, hostname)
            print_success("Token for {0} added successfully!".format(appname))

        elif args.list:
            # Populate the list of search results.
            items = self.db.get_token_list()
            rows = []
            for item in items:
                row = [item.id, item.app, item.user, item.fqdn]
                rows.append(row)

            if len(rows) < 1:
                print_info("No token profiles configured")
                return

            # Generate a table with the results.
            header = ['#', 'App', 'User', 'FQDN']
            self.log("table", dict(header=header, rows=rows))

        else:
            parser.print_help()


    ##
    # NEW
    #
    # This command is used to create a new session on a new file,
    # useful for copy & paste of content like Email headers

    def cmd_new(self, *args):
        title = input("Enter a title for the new file: ")
        # Create a new temporary file.
        tmp = tempfile.NamedTemporaryFile(delete=False)
        # Open the temporary file with the default editor, or with nano.
        os.system('"${EDITOR:-nano}" ' + tmp.name)
        __sessions__.new(tmp.name)
        __sessions__.current.file.name = title
        print_info("New file with title \"{0}\" added to the current session".format(bold(title)))

    ##
    # OPEN
    #
    # This command is used to open a session on a given file.
    # It either can be an external file path, or a SHA256 hash of a file which
    # has been previously imported and stored.
    # While the session is active, every operation and module executed will be
    # run against the file specified.
    def cmd_open(self, *args):
        parser = argparse.ArgumentParser(prog='open', description="Open a file", epilog="You can also specify a MD5 or SHA256 hash to a previously stored file in order to open a session on it.")
        group = parser.add_mutually_exclusive_group()
        group.add_argument('-f', '--file', action='store_true', help="Target is a file")
        group.add_argument('-u', '--url', action='store_true', help="Target is a URL")
        group.add_argument('-l', '--last', action='store_true', help="Target is the entry number from the last find command's results")
        parser.add_argument('-t', '--tor', action='store_true', help="Download the file through Tor")
        parser.add_argument("value", metavar='PATH, URL, HASH or ID', nargs='*', help="Target to open. Hash can be md5 or sha256. ID has to be from the last search.")

        try:
            args = parser.parse_args(args)
        except:
            return

        target = " ".join(args.value)

        if not args.last and target is None:
            parser.print_usage()
            return

        # If it's a file path, open a session on it.
        if args.file:
            target = os.path.expanduser(target)

            if not os.path.exists(target) or not os.path.isfile(target):
                self.log('error', "File not found: {0}".format(target))
                return

            __sessions__.new(target)
        # If it's a URL, download it and open a session on the temporary file.
        elif args.url:
            data = download(url=target, tor=args.tor)

            if data:
                tmp = tempfile.NamedTemporaryFile(delete=False)
                tmp.write(data)
                tmp.close()

                __sessions__.new(tmp.name)
        # Try to open the specified file from the list of results from
        # the last find command.
        elif args.last:
            if __sessions__.find:
                count = 1
                for item in __sessions__.find:
                    if count == int(target):
                        __sessions__.new(get_sample_path(item.sha256))
                        break

                    count += 1
            else:
                self.log('warning', "You haven't performed a find yet")
        # Otherwise we assume it's an hash of an previously stored sample.
        else:
            target = target.strip().lower()

            if len(target) == 32:
                key = 'md5'
            elif len(target) == 64:
                key = 'sha256'
            else:
                parser.print_usage()
                return

            rows = self.db.find(key=key, value=target)

            if not rows:
                self.log('warning', "No file found with the given hash {0}".format(target))
                return

            path = get_sample_path(rows[0].sha256)
            if path:
                __sessions__.new(path)

    ##
    # CLOSE
    #
    # This command resets the open session.
    # After that, all handles to the opened file should be closed and the
    # shell should be restored to the default prompt.
    def cmd_close(self, *args):
        __sessions__.close()

    ##
    # INFO
    #
    # This command returns information on the open session. It returns details
    # on the file (e.g. hashes) and other information that might available from
    # the database.
    def cmd_info(self, *args):
        if __sessions__.is_set():
            self.log('table', dict(
                header=['Key', 'Value'],
                rows=[
                    ['Name', __sessions__.current.file.name],
                    ['Tags', __sessions__.current.file.tags],
                    ['Path', __sessions__.current.file.path],
                    ['Size', __sessions__.current.file.size],
                    ['Type', __sessions__.current.file.type],
                    ['Mime', __sessions__.current.file.mime],
                    ['MD5', __sessions__.current.file.md5],
                    ['SHA1', __sessions__.current.file.sha1],
                    ['SHA256', __sessions__.current.file.sha256],
                    ['SHA512', __sessions__.current.file.sha512],
                    ['SSdeep', __sessions__.current.file.ssdeep],
                    ['CRC32', __sessions__.current.file.crc32]
                ]
            ))

    ##
    # NOTES
    #
    # This command allows you to view, add, modify and delete notes associated
    # with the current investigation.
    def cmd_notes(self, *args):
        parser = argparse.ArgumentParser(prog="notes", description="Show information on the current investigation")
        group = parser.add_mutually_exclusive_group()
        group.add_argument('-l', '--list', action='store_true', help="List all notes available for the current investigation")
        group.add_argument('-a', '--add', action='store_true', help="Add a new note to the current investigation")
        group.add_argument('-v', '--view', metavar='NOTE', help="View the specified note")
        group.add_argument('-e', '--edit', metavar='NOTE', type=int, help="Edit an existing note")
        group.add_argument('-d', '--delete', metavar='NOTE', type=int, help="Delete an existing note")

        notepath = __project__.path + '/notes'
        notelist = os.listdir(__project__.path + '/notes')

        try:
            args = parser.parse_args(args)
        except:
            return

        if __project__.name is None:
            print_error('Cannot store notes in the default investigation. Please open a new case.')
            return

        if args.list:
            # Retrieve all notes for the currently opened investigation.
            pass

            if len(notelist) < 1:
                self.log('info', "No notes available for this investigation yet")
                return

            # Build table of existing case notes
            rows = []
            notecount = 1
            for note in notelist:
                rows.append([notecount, note])
                notecount += 1

            # Display list of existing notes.
            self.log('table', dict(header=['ID', 'Title'], rows=rows))

        elif args.add:
            title = input("Enter a title for the new note: ")

            # Create a new temporary file.
            tmp = tempfile.NamedTemporaryFile(delete=False)
            # Open the temporary file with the default editor, or with nano.
            os.system('"${EDITOR:-nano}" ' + tmp.name)
            # Once the user is done editing, we need to read the content and
            # store it in the database.
            body = tmp.read()
            # store note in a file
            with open(notepath + '/' + title, 'w+') as note:
                note.write(body)

            # store note in the database

            # Finally, remove the temporary file.
            os.remove(tmp.name)

            self.log('info', "Note with title \"{0}\" added to the current investigation".format(bold(title)))

        elif args.view:
            # Retrieve note wth the specified ID and print it.
            title = args.view
            note = notepath + '/' + title
            if os.path.exists(note):
                self.log('info', bold('Title: ') + title)
                try:
                    with open(note, 'r') as notehndle:
                        self.log('info', bold('Body:') + '\n' + notehndle.read())
                except IOError:
                    print_error("Could not open note by title {0}".format(title))
            else:
                self.log('info', "There is no note with title {0}".format(args.view))

        elif args.edit:
            # Retrieve note with the specified ID.
            note = Database().get_note(args.edit)
            if note:
                # Create a new temporary file.
                tmp = tempfile.NamedTemporaryFile(delete=False)
                # Write the old body to the temporary file.
                tmp.write(note.body)
                tmp.close()
                # Open the old body with the text editor.
                os.system('"${EDITOR:-nano}" ' + tmp.name)
                # Read the new body from the temporary file.
                body = open(tmp.name, 'r').read()
                # Update the note entry with the new body.
                Database().edit_note(args.edit, body)
                # Remove the temporary file.
                os.remove(tmp.name)

                self.log('info', "Updated note with ID {0}".format(args.edit))

        elif args.delete:
            # Delete the note with the specified ID.
            Database().delete_note(args.delete)
        else:
            parser.print_usage()

    ##
    # STORE
    #
    # This command stores the opened file in the local repository and tries
    # to store details in the database.
    def cmd_store(self, *args):
        parser = argparse.ArgumentParser(prog='store', description="Store the opened file in the current investigation")
        parser.add_argument('-d', '--delete', action='store_true', help="Delete the original file")
        parser.add_argument('-f', '--folder', type=str, nargs='+', help="Specify a folder to import")
        parser.add_argument('-s', '--file-size', type=int, help="Specify a maximum file size")
        parser.add_argument('-y', '--file-type', type=str, help="Specify a file type pattern")
        parser.add_argument('-n', '--file-name', type=str, help="Specify a file name pattern")
        parser.add_argument('-t', '--tags', type=str, nargs='+', help="Specify a list of comma-separated tags")

        try:
            args = parser.parse_args(args)
        except:
            return

        if args.folder is not None:
            # Allows to have spaces in the path.
            args.folder = " ".join(args.folder)

        if args.tags is not None:
            # Remove the spaces in the list of tags
            args.tags = "".join(args.tags)

        def add_file(obj, tags=None):
            if get_sample_path(obj.sha256):
                self.log('warning', "Skip, file \"{0}\" appears to be already stored".format(obj.name))
                return False

            if __project__.name:
                pass
            else:
                print_error("Must open an investigation to store files")
                return False

            # Try to store file object into database.
            status = self.db.add(obj=obj, tags=tags)
            if status:
                # If succeeds, store also in the local repository.
                # If something fails in the database (for example unicode strings)
                # we don't want to have the binary lying in the repository with no
                # associated database record.
                new_path = store_sample(obj)
                self.log("success", "Stored file \"{0}\" to {1}".format(obj.name, new_path))
            else:
                return False

            # Delete the file if requested to do so.
            if args.delete:
                try:
                    os.unlink(obj.path)
                except Exception as e:
                    self.log('warning', "Failed deleting file: {0}".format(e))

            return True

        # If the user specified the --folder flag, we walk recursively and try
        # to add all contained files to the local repository.
        # This is not going to open a new session.
        # TODO: perhaps disable or make recursion optional?
        if args.folder is not None:
            # Check if the specified folder is valid.
            if os.path.isdir(args.folder):
                # Walk through the folder and subfolders.
                for dir_name, dir_names, file_names in walk(args.folder):
                    # Add each collected file.
                    for file_name in file_names:
                        file_path = os.path.join(dir_name, file_name)

                        if not os.path.exists(file_path):
                            continue
                        # Check if file is not zero.
                        if not os.path.getsize(file_path) > 0:
                            continue

                        # Check if the file name matches the provided pattern.
                        if args.file_name:
                            if not fnmatch.fnmatch(file_name, args.file_name):
                                # self.log('warning', "Skip, file \"{0}\" doesn't match the file name pattern".format(file_path))
                                continue

                        # Check if the file type matches the provided pattern.
                        if args.file_type:
                            if args.file_type not in File(file_path).type:
                                # self.log('warning', "Skip, file \"{0}\" doesn't match the file type".format(file_path))
                                continue

                        # Check if file exceeds maximum size limit.
                        if args.file_size:
                            # Obtain file size.
                            if os.path.getsize(file_path) > args.file_size:
                                self.log('warning', "Skip, file \"{0}\" is too big".format(file_path))
                                continue

                        file_obj = File(file_path)

                        # Add file.
                        add_file(file_obj, args.tags)
            else:
                self.log('error', "You specified an invalid folder: {0}".format(args.folder))
        # Otherwise we try to store the currently opened file, if there is any.
        else:
            if __sessions__.is_set():
                if __sessions__.current.file.size == 0:
                    self.log('warning', "Skip, file \"{0}\" appears to be empty".format(__sessions__.current.file.name))
                    return False

                # Add file.
                if add_file(__sessions__.current.file, args.tags):
                    # Open session to the new file.
                    self.cmd_open(*[__sessions__.current.file.sha256])
            else:
                self.log('error', "No session opened")

    ##
    # DELETE
    #
    # This commands deletes the currenlty opened file (only if it's stored in
    # the local repository) and removes the details from the database
    def cmd_delete(self, *args):
        if __sessions__.is_set():
            while True:
                choice = input("Are you sure you want to delete this binary? Can't be reverted! [y/n] ")
                if choice == 'y':
                    break
                elif choice == 'n':
                    return

            rows = self.db.find('sha256', __sessions__.current.file.sha256)
            if rows:
                malware_id = rows[0].id
                if self.db.delete_file(malware_id):
                    self.log("success", "File deleted")
                else:
                    self.log('error', "Unable to delete file")

            os.remove(__sessions__.current.file.path)
            __sessions__.close()
        else:
            self.log('error', "No session opened")

    ##
    # FIND
    #
    # This command is used to search for files in the database.
    def cmd_find(self, *args):
        parser = argparse.ArgumentParser(prog='find', description="Find a file")
        group = parser.add_mutually_exclusive_group()
        group.add_argument('-t', '--tags', action='store_true', help="List available tags and quit")
        group.add_argument('type', nargs='?', choices=["all", "latest", "name", "type", "mime", "md5", "sha256", "tag", "note"], help="Where to search.")
        parser.add_argument("value", nargs='?', help="String to search.")
        try:
            args = parser.parse_args(args)
        except:
            return

        # One of the most useful search terms is by tag. With the --tags
        # argument we first retrieve a list of existing tags and the count
        # of files associated with each of them.
        if args.tags:
            # Retrieve list of tags.
            tags = self.db.list_tags()

            if tags:
                rows = []
                # For each tag, retrieve the count of files associated with it.
                for tag in tags:
                    count = len(self.db.find('tag', tag.tag))
                    rows.append([tag.tag, count])

                # Generate the table with the results.
                header = ['Tag', '# Entries']
                rows.sort(key=lambda x: x[1], reverse=True)
                self.log('table', dict(header=header, rows=rows))
            else:
                self.log('warning', "No tags available")

            return

        # At this point, if there are no search terms specified, return.
        if args.type is None:
            parser.print_usage()
            return

        key = args.type
        if key != 'all' and key != 'latest':
            try:
                # The second argument is the search value.
                value = args.value
            except IndexError:
                self.log('error', "You need to include a search term.")
                return
        else:
            value = None

        # Search all the files matching the given parameters.
        items = self.db.find(key, value)
        if not items:
            return

        # Populate the list of search results.
        rows = []
        count = 1
        for item in items:
            tag = ', '.join([t.tag for t in item.tag if t.tag])
            row = [count, item.name, item.mime, item.md5, tag]
            if key == 'latest':
                row.append(item.created_at)

            rows.append(row)
            count += 1

        # Update find results in current session.
        __sessions__.find = items

        # Generate a table with the results.
        header = ['#', 'Name', 'Mime', 'MD5', 'Tags']
        if key == 'latest':
            header.append('Created At')

        self.log("table", dict(header=header, rows=rows))

    ##
    # TAGS
    #
    # This command is used to modify the tags of the opened file.
    def cmd_tags(self, *args):
        parser = argparse.ArgumentParser(prog='tags', description="Modify tags of the opened file")
        parser.add_argument('-a', '--add', metavar='TAG', help="Add tags to the opened file (comma separated)")
        parser.add_argument('-d', '--delete', metavar='TAG', help="Delete a tag from the opened file")
        try:
            args = parser.parse_args(args)
        except:
            return

        # This command requires a session to be opened.
        if not __sessions__.is_set():
            self.log('error', "No session opened")
            parser.print_usage()
            return

        # If no arguments are specified, there's not much to do.
        # However, it could make sense to also retrieve a list of existing
        # tags from this command, and not just from the "find" command alone.
        if args.add is None and args.delete is None:
            parser.print_usage()
            return

        # TODO: handle situation where addition or deletion of a tag fail.

        db = Database()
        if not db.find(key='sha256', value=__sessions__.current.file.sha256):
            self.log('error', "The opened file is not stored in the database. "
                "If you want to add it use the `store` command.")
            return

        if args.add:
            # Add specified tags to the database's entry belonging to
            # the opened file.
            db.add_tags(__sessions__.current.file.sha256, args.add)
            self.log('info', "Tags added to the currently opened file")

            # We refresh the opened session to update the attributes.
            # Namely, the list of tags returned by the 'info' command
            # needs to be re-generated, or it wouldn't show the new tags
            # until the existing session is closed a new one is opened.
            self.log('info', "Refreshing session to update attributes...")
            __sessions__.new(__sessions__.current.file.path)

        if args.delete:
            # Delete the tag from the database.
            db.delete_tag(args.delete, __sessions__.current.file.sha256)
            # Refresh the session so that the attributes of the file are
            # updated.
            self.log('info', "Refreshing session to update attributes...")
            __sessions__.new(__sessions__.current.file.path)

    ###
    # SESSION
    #
    # This command is used to list and switch across all the opened sessions.
    def cmd_sessions(self, *args):
        parser = argparse.ArgumentParser(prog='sessions', description="Open a file", epilog="List or switch sessions")
        group = parser.add_mutually_exclusive_group()
        group.add_argument('-l', '--list', action='store_true', help="List all existing sessions")
        group.add_argument('-s', '--switch', type=int, help="Switch to the specified session")

        try:
            args = parser.parse_args(args)
        except:
            return

        if args.list:
            if not __sessions__.sessions:
                self.log('info', "There are no opened sessions")
                return

            rows = []
            for session in __sessions__.sessions:
                current = ''
                if session == __sessions__.current:
                    current = 'Yes'

                rows.append([
                    session.id,
                    session.file.name,
                    session.file.md5,
                    session.created_at,
                    current
                ])

            self.log('info', "Opened Sessions:")
            self.log("table", dict(header=['#', 'Name', 'MD5', 'Created At', 'Current'], rows=rows))
        elif args.switch:
            for session in __sessions__.sessions:
                if args.switch == session.id:
                    __sessions__.switch(session)
                    return

            self.log('warning', "The specified session ID doesn't seem to exist")
        else:
            parser.print_usage()

    ##
    # INVESTIGATIONS
    #
    # This command retrieves a list of all projects.
    # You can also switch to a different project.
    def cmd_investigations(self, *args):
        parser = argparse.ArgumentParser(prog='investigations', description="Open a case", epilog="List or switch current investigations")
        group = parser.add_mutually_exclusive_group()
        group.add_argument('-l', '--list', action='store_true', help="List all existing investigations")
        group.add_argument('-s', '--switch', metavar='NAME', help="Switch to the specified investigation")
        group.add_argument('-d', '--delete', type=int, metavar='ID', help="delete investigation by id.")

        try:
            args = parser.parse_args(args)
        except:
            return

        projects_path = os.path.join(os.getcwd(), 'investigations')

        if not os.path.exists(projects_path):
            self.log('info', "The investigations directory does not exist yet")
            return

        if args.list:
            self.log('info', "Current Investigations:")
            rows = []
            items = self.db.get_investigation_list()

            # Populate the list of search results.
            count = 1
            for item in items:
                row = [item.id, item.name]
                rows.append(row)

            self.log('table', dict(header=['ID', 'Name'], rows=rows))
        elif args.switch:
            if __sessions__.is_set():
                __sessions__.close()
                self.log('info', "Closed opened session")

            __project__.open(args.switch, self.db)
            self.log('info', "Switched to investigation {0}".format(bold(args.switch)))

            # Need to re-initialize the Database to open the new SQLite file.
            self.db = Database()
        elif args.delete:
            if __sessions__.is_set():
                __sessions__.close()
                self.log('info', "Closed opened session")

            __project__.delete(args.delete, self.db)
            self.log('info', "Deleted investigation {0}".format(bold(args.delete)))

            # Need to re-initialize the Database to open the new SQLite file.
            self.db = Database()
        else:
            self.log('info', parser.print_usage())

    ##
    # EXPORT
    #
    # This command will export the current session to file or zip.
    def cmd_export(self, *args):
        parser = argparse.ArgumentParser(prog='export', description="Export the current session to file or zip")
        parser.add_argument('-z', '--zip', action='store_true', help="Export session in a zip archive")
        parser.add_argument('value', help="path or archive name")

        try:
            args = parser.parse_args(args)
        except:
            return

        # This command requires a session to be opened.
        if not __sessions__.is_set():
            self.log('error', "No session opened")
            parser.print_usage()
            return

        # Check for valid export path.
        if args.value is None:
            parser.print_usage()
            return

        # TODO: having for one a folder and for the other a full
        # target path can be confusing. We should perhaps standardize this.

        # Abort if the specified path already exists.
        if os.path.isfile(args.value):
            self.log('error', "File at path \"{0}\" already exists, abort".format(args.value))
            return

        # If the argument chosed so, archive the file when exporting it.
        # TODO: perhaps add an option to use a password for the archive
        # and default it to "infected".
        if args.zip:
            try:
                with ZipFile(args.value, 'w') as export_zip:
                    export_zip.write(__sessions__.current.file.path, arcname=__sessions__.current.file.name)
            except IOError as e:
                self.log('error', "Unable to export file: {0}".format(e))
            else:
                self.log('info', "File archived and exported to {0}".format(args.value))
        # Otherwise just dump it to the given directory.
        else:
            # XXX: Export file with the original file name.
            store_path = os.path.join(args.value, __sessions__.current.file.name)

            try:
                shutil.copyfile(__sessions__.current.file.path, store_path)
            except IOError as e:
                self.log('error', "Unable to export file: {0}".format(e))
            else:
                self.log('info', "File exported to {0}".format(store_path))
                
    ##
    # Stats
    #
    # This command allows you to generate basic statistics for the stored files.
    def cmd_stats(self, *args):
        parser = argparse.ArgumentParser(prog='stats', description="Display Database File Statistics")
        parser.add_argument('-t', '--top', type=int, help='Top x Items')

        try:
            args = parser.parse_args(args)
        except:
            return


        arg_top = args.top
        db = Database()

        # Set all Counters Dict
        extension_dict = defaultdict(int)
        mime_dict = defaultdict(int)
        tags_dict = defaultdict(int)
        size_list = []
        
        # Find all
        items = self.db.find('all')
        
        if len(items) < 1:
            self.log('info', "No items in database to generate stats")
            return
        
        # Sort in to stats
        for item in items:
            if '.' in item.name:
                ext = item.name.split('.')
                extension_dict[ext[-1]] += 1
            mime_dict[item.mime] += 1
            size_list.append(item.size)
            for t in item.tag:
                if t.tag:
                    tags_dict[t.tag] += 1
        
        avg_size = sum(size_list) / len(size_list)
        all_stats = {'Total':len(items), 'File Extension':extension_dict, 'Mime':mime_dict, 'Tags':tags_dict, 'Avg Size':avg_size, 'Largest':max(size_list), 'Smallest':min(size_list)}

        # Counter for top x
        if arg_top:
            counter = arg_top
            prefix = 'Top {0} '.format(counter)
        else:
            counter = len(items)
            prefix = ''
        
        # Project Stats Last as i have it iterate them all
        
        # Print all the results
        
        self.log('info', "Projects")
        self.log('table', dict(header=['Name', 'Count'], rows=[['Main', len(items)], ['Next', '10']]))
        
        # For Current Project
        self.log('info', "Current Project")
        
        # Extension
        self.log('info', "{0}Extensions".format(prefix))
        header = ['Ext', 'Count']
        rows = []

        for k in sorted(extension_dict, key=extension_dict.get, reverse=True)[:counter]:
            rows.append([k, extension_dict[k]])
        self.log('table', dict(header=header, rows=rows))
        
        
        # Mimes
        self.log('info', "{0}Mime Types".format(prefix))
        header = ['Mime', 'Count']
        rows = []
        for k in sorted(mime_dict, key=mime_dict.get, reverse=True)[:counter]:
            rows.append([k, mime_dict[k]])
        self.log('table', dict(header=header, rows=rows))
        
        # Tags
        self.log('info', "{0}Tags".format(prefix))
        header = ['Tag', 'Count']
        rows = []
        for k in sorted(tags_dict, key=tags_dict.get, reverse=True)[:counter]:
            rows.append([k, tags_dict[k]])
        self.log('table', dict(header=header, rows=rows))
        
        # Size
        self.log('info', "Size Stats")
        self.log('item', "Largest  {0}".format(convert_size(max(size_list))))
        self.log('item', "Smallest  {0}".format(convert_size(min(size_list))))
        self.log('item', "Average  {0}".format(convert_size(avg_size)))

Example #16

Show file

File: pe.py Project: killbug2004/CIRTKit

    def entrypoint(self):
        if self.args.scan and self.args.cluster:
            self.log('error', "You selected two exclusive options, pick one")
            return

        if self.args.all:
            db = Database()
            samples = db.find(key='all')

            rows = []
            for sample in samples:
                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_ep = pefile.PE(sample_path).OPTIONAL_HEADER.AddressOfEntryPoint
                except:
                    continue

                rows.append([sample.md5, sample.name, cur_ep])

            self.log('table', dict(header=['MD5', 'Name', 'AddressOfEntryPoint'], rows=rows))

            return

        if self.args.cluster:
            db = Database()
            samples = db.find(key='all')

            cluster = {}
            for sample in samples:
                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_ep = pefile.PE(sample_path).OPTIONAL_HEADER.AddressOfEntryPoint
                except:
                    continue

                if cur_ep not in cluster:
                    cluster[cur_ep] = []

                cluster[cur_ep].append([sample.md5, sample.name])

            for cluster_name, cluster_members in cluster.items():
                # Skipping clusters with only one entry.
                if len(cluster_members) == 1:
                    continue

                self.log('info', "AddressOfEntryPoint cluster {0}".format(bold(cluster_name)))

                self.log('table', dict(header=['MD5', 'Name'],
                    rows=cluster_members))

            return

        if not self.__check_session():
            return

        ep = self.pe.OPTIONAL_HEADER.AddressOfEntryPoint

        self.log('info', "AddressOfEntryPoint: {0}".format(ep))

        if self.args.scan:
            db = Database()
            samples = db.find(key='all')

            rows = []
            for sample in samples:
                if sample.sha256 == __sessions__.current.file.sha256:
                    continue

                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_ep = pefile.PE(sample_path).OPTIONAL_HEADER.AddressOfEntryPoint
                except:
                    continue

                if ep == cur_ep:
                    rows.append([sample.md5, sample.name])

            self.log('info', "Following are samples with AddressOfEntryPoint {0}".format(bold(ep)))

            self.log('table', dict(header=['MD5', 'Name'],
                rows=rows))

Example #17

Show file

    def entrypoint(self):
        if self.args.scan and self.args.cluster:
            self.log('error', "You selected two exclusive options, pick one")
            return

        if self.args.all:
            db = Database()
            samples = db.find(key='all')

            rows = []
            for sample in samples:
                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_ep = pefile.PE(
                        sample_path).OPTIONAL_HEADER.AddressOfEntryPoint
                except:
                    continue

                rows.append([sample.md5, sample.name, cur_ep])

            self.log(
                'table',
                dict(header=['MD5', 'Name', 'AddressOfEntryPoint'], rows=rows))

            return

        if self.args.cluster:
            db = Database()
            samples = db.find(key='all')

            cluster = {}
            for sample in samples:
                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_ep = pefile.PE(
                        sample_path).OPTIONAL_HEADER.AddressOfEntryPoint
                except:
                    continue

                if cur_ep not in cluster:
                    cluster[cur_ep] = []

                cluster[cur_ep].append([sample.md5, sample.name])

            for cluster_name, cluster_members in cluster.items():
                # Skipping clusters with only one entry.
                if len(cluster_members) == 1:
                    continue

                self.log(
                    'info', "AddressOfEntryPoint cluster {0}".format(
                        bold(cluster_name)))

                self.log('table',
                         dict(header=['MD5', 'Name'], rows=cluster_members))

            return

        if not self.__check_session():
            return

        ep = self.pe.OPTIONAL_HEADER.AddressOfEntryPoint

        self.log('info', "AddressOfEntryPoint: {0}".format(ep))

        if self.args.scan:
            db = Database()
            samples = db.find(key='all')

            rows = []
            for sample in samples:
                if sample.sha256 == __sessions__.current.file.sha256:
                    continue

                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_ep = pefile.PE(
                        sample_path).OPTIONAL_HEADER.AddressOfEntryPoint
                except:
                    continue

                if ep == cur_ep:
                    rows.append([sample.md5, sample.name])

            self.log(
                'info',
                "Following are samples with AddressOfEntryPoint {0}".format(
                    bold(ep)))

            self.log('table', dict(header=['MD5', 'Name'], rows=rows))

Example #18

Show file

    def pehash(self):
        if not HAVE_PEHASH:
            self.log(
                'error',
                "PEhash is missing. Please copy PEhash to the modules directory of Viper"
            )
            return

        current_pehash = None
        if __sessions__.is_set():
            current_pehash = calculate_pehash(__sessions__.current.file.path)
            self.log('info', "PEhash: {0}".format(bold(current_pehash)))

        if self.args.all or self.args.cluster or self.args.scan:
            db = Database()
            samples = db.find(key='all')

            rows = []
            for sample in samples:
                sample_path = get_sample_path(sample.sha256)
                pe_hash = calculate_pehash(sample_path)
                if pe_hash:
                    rows.append((sample.name, sample.md5, pe_hash))

        if self.args.all:
            self.log('info', "PEhash for all files:")
            header = ['Name', 'MD5', 'PEhash']
            self.log('table', dict(header=header, rows=rows))

        elif self.args.cluster:
            self.log('info', "Clustering files by PEhash...")

            cluster = {}
            for sample_name, sample_md5, pe_hash in rows:
                cluster.setdefault(pe_hash,
                                   []).append([sample_name, sample_md5])

            for item in cluster.items():
                if len(item[1]) > 1:
                    self.log('info',
                             "PEhash cluster {0}:".format(bold(item[0])))
                    self.log('table', dict(header=['Name', 'MD5'],
                                           rows=item[1]))

        elif self.args.scan:
            if __sessions__.is_set() and current_pehash:
                self.log('info', "Finding matching samples...")

                matches = []
                for row in rows:
                    if row[1] == __sessions__.current.file.md5:
                        continue

                    if row[2] == current_pehash:
                        matches.append([row[0], row[1]])

                if matches:
                    self.log('table', dict(header=['Name', 'MD5'],
                                           rows=matches))
                else:
                    self.log('info', "No matches found")

Example #19

Show file

File: fuzzy.py Project: ziedsn/CIRTKit

    def run(self):
        super(Fuzzy, self).run()

        if not HAVE_PYDEEP:
            self.log(
                'error',
                "Missing dependency, install pydeep (`pip install pydeep`)")
            return

        arg_verbose = False
        arg_cluster = False
        if self.args:
            if self.args.verbose:
                arg_verbose = self.args.verbose
            if self.args.cluster:
                arg_cluster = self.args.cluster

            db = Database()
            samples = db.find(key='all')

            # Check if we're operating in cluster mode, otherwise we run on the
            # currently opened file.
            if arg_cluster:
                self.log('info',
                         "Generating clusters, this might take a while...")

                clusters = dict()
                for sample in samples:
                    if not sample.ssdeep:
                        continue

                    if arg_verbose:
                        self.log(
                            'info', "Testing file {0} with ssdeep {1}".format(
                                sample.md5, sample.ssdeep))

                    clustered = False
                    for cluster_name, cluster_members in clusters.items():
                        # Check if sample is already in the cluster.
                        if sample.md5 in cluster_members:
                            continue

                        if arg_verbose:
                            self.log(
                                'info', "Testing {0} in cluser {1}".format(
                                    sample.md5, cluster_name))

                        for member in cluster_members:
                            if sample.md5 == member[0]:
                                continue

                            member_hash = member[0]
                            member_name = member[1]

                            member_ssdeep = db.find(
                                key='md5', value=member_hash)[0].ssdeep
                            if pydeep.compare(sample.ssdeep,
                                              member_ssdeep) > 40:
                                if arg_verbose:
                                    self.log(
                                        'info',
                                        "Found home for {0} in cluster {1}".
                                        format(sample.md5, cluster_name))

                                clusters[cluster_name].append(
                                    [sample.md5, sample.name])
                                clustered = True
                                break

                    if not clustered:
                        cluster_id = len(clusters) + 1
                        clusters[cluster_id] = [
                            [sample.md5, sample.name],
                        ]

                ordered_clusters = collections.OrderedDict(
                    sorted(clusters.items()))

                self.log(
                    'info',
                    "Following are the identified clusters with more than one member"
                )

                for cluster_name, cluster_members in ordered_clusters.items():
                    # We include in the results only clusters with more than just
                    # one member.
                    if len(cluster_members) <= 1:
                        continue

                    self.log('info',
                             "Ssdeep cluster {0}".format(bold(cluster_name)))

                    self.log(
                        'table',
                        dict(header=['MD5', 'Name'], rows=cluster_members))

            # We're running against the already opened file.
            else:
                if not __sessions__.is_set():
                    self.log('error', "No session opened")
                    return

                if not __sessions__.current.file.ssdeep:
                    self.log('error',
                             "No ssdeep hash available for opened file")
                    return

                matches = []
                for sample in samples:
                    if sample.sha256 == __sessions__.current.file.sha256:
                        continue

                    if not sample.ssdeep:
                        continue

                    score = pydeep.compare(__sessions__.current.file.ssdeep,
                                           sample.ssdeep)

                    if score > 40:
                        matches.append(
                            ['{0}%'.format(score), sample.name, sample.sha256])

                    if arg_verbose:
                        self.log(
                            'info', "Match {0}%: {2} [{1}]".format(
                                score, sample.name, sample.sha256))

                self.log(
                    'info',
                    "{0} relevant matches found".format(bold(len(matches))))

                if len(matches) > 0:
                    self.log(
                        'table',
                        dict(header=['Score', 'Name', 'SHA256'], rows=matches))

Example #20

Show file

File: commands.py Project: killbug2004/CIRTKit

class Commands(object):

    output = []
    
    def __init__(self):
        # Open connection to the database.
        self.db = Database()

        # Map commands to their related functions.
        self.commands = dict(
            help=dict(obj=self.cmd_help, description="Show this help message"),
            open=dict(obj=self.cmd_open, description="Open a file"),
            new=dict(obj=self.cmd_new, description="Create new file"),
            close=dict(obj=self.cmd_close, description="Close the current session"),
            info=dict(obj=self.cmd_info, description="Show information on the opened file"),
            notes=dict(obj=self.cmd_notes, description="View, add and edit notes in the current investigation"),
            clear=dict(obj=self.cmd_clear, description="Clear the console"),
            store=dict(obj=self.cmd_store, description="Store the opened file to the local repository"),
            delete=dict(obj=self.cmd_delete, description="Delete the opened file"),
            find=dict(obj=self.cmd_find, description="Find a file"),
            tags=dict(obj=self.cmd_tags, description="Modify tags of the opened file"),
            sessions=dict(obj=self.cmd_sessions, description="List or switch sessions"),
            stats=dict(obj=self.cmd_stats, description="Collection Statistics"),
            investigations=dict(obj=self.cmd_investigations, description="List or switch current investigations"),
            export=dict(obj=self.cmd_export, description="Export the current session to file or zip"),
            modules=dict(obj=self.cmd_modules, description="List available modules"),
            integrate=dict(obj=self.cmd_integrate, description="Interact with available integrations"),
            tokens=dict(obj=self.cmd_tokens, description="Store and retrieve API tokens for integrations and modules"),
        )
        
    # Output Logging
    def log(self, event_type, event_data):
        self.output.append(dict(
            type=event_type,
            data=event_data
        ))
        
    ##
    # CLEAR
    #
    # This command simply clears the shell.
    def cmd_clear(self, *args):
        os.system('clear')

    ##
    # HELP
    #
    # This command simply prints the help message.
    # It lists both embedded commands and loaded modules.
    def cmd_help(self, *args):
        self.log('info', "Commands")

        # Build table of commands from commands dict above
        rows = []
        for command_name, command_item in self.commands.items():
            rows.append([command_name, command_item['description']])

        rows.append(["exit, quit", "Quit CIRTKit"])
        rows = sorted(rows, key=lambda entry: entry[0])

        self.log('table', dict(header=['Command', 'Description'], rows=rows))

    ##
    # MODULES
    #
    # Lists all modules discovered in the modules directory
    # and lists them by package name, by enumerating
    # over the directory we can rerun dynamically
    def cmd_modules(self, *args):

        parser = argparse.ArgumentParser(prog='modules', description="Lists modules", epilog="You can also specify -r to dynamically pickup new modules")
        parser.add_argument('-r', '--reload', action='store_true', help="Reload modules")

        moduleDict = __modules__

        try:
            args = parser.parse_args(args)
        except:
            return

        if args.reload:
            from lib.core.plugins import load_modules
            moduleDict = load_modules()

        rows = []
        for module_name, module_item in moduleDict.items():
            rows.append([module_name, module_item['description']])

        rows = sorted(rows, key=lambda entry: entry[0])

        # Build table of modules from the modules available
        self.log('info', "Modules")
        self.log('table', dict(header=['Command', 'Description'], rows=rows))

    ##
    # INTEGRATIONS
    #
    # Lists all integrations available
    def cmd_integrate(self, *args):

        parser = argparse.ArgumentParser(prog='integrate', description="Load integrations")
        parser.add_argument('-n', '--name', type=str, nargs=1, help="Load integration by name")
        parser.add_argument('-a', '--all', action='store_true', help="List all available integrations")

        integrateDict = __integrations__

        try:
            args = parser.parse_args(args)
        except:
            return

        rows = []
        for name, desc in integrateDict.items():
            rows.append([name, desc['description']])

        if args.all:
            rows = sorted(rows, key=lambda entry: entry[0])

            # Build table of integrations
            self.log('info', "Integrations")
            self.log('table', dict(header=['Name', 'Description'], rows=rows))

        elif args.name:
            name = args.name[0]
            item = integrateDict[args.name[0]]['obj']()
            print_info("Loading {0}\n".format(name))
            item.load()

        else:
            parser.print_help()

    ##
    # TOKENS
    #
    # Store and retrieve tokens for app integrations
    # and for other modules used in cirtkit
    def cmd_tokens(self, *args):

        parser = argparse.ArgumentParser(prog='tokens', description="Store and retrieve API tokens")
        parser.add_argument('-d', '--delete', type=int, metavar="Token_ID", nargs=1, help="Delete token by ID")
        parser.add_argument('-a', '--add', action='store_true', help="Add a new API token")
        parser.add_argument('-l', '--list', action='store_true', help="List all configured API tokens")

        try:
            args = parser.parse_args(args)
        except:
            return

        if args.delete:
            tokenid = args.delete
            self.db.delete_token(tokenid)
            print_success("Token {0} deleted successfully!".format(tokenid))
        elif args.add:
            print_info("Application new token will be used for:")
            appname = input("> ")
            print_info("API Token:")
            apitoken = input("> ")
            print_info("Username (if applicable):")
            username = input("> ")
            print_info("FQDN of remote server (format: ex.server.com:8000):")
            hostname = input("> ")
            if len(username) == 0:
                username = ""

            self.db.add_token(apitoken, username, appname, hostname)
            print_success("Token for {0} added successfully!".format(appname))

        elif args.list:
            # Populate the list of search results.
            items = self.db.get_token_list()
            rows = []
            for item in items:
                row = [item.id, item.app, item.user, item.fqdn]
                rows.append(row)

            if len(rows) < 1:
                print_info("No token profiles configured")
                return

            # Generate a table with the results.
            header = ['#', 'App', 'User', 'FQDN']
            self.log("table", dict(header=header, rows=rows))

        else:
            parser.print_help()


    ##
    # NEW
    #
    # This command is used to create a new session on a new file,
    # useful for copy & paste of content like Email headers

    def cmd_new(self, *args):
        title = input("Enter a title for the new file: ")
        # Create a new temporary file.
        tmp = tempfile.NamedTemporaryFile(delete=False)
        # Open the temporary file with the default editor, or with nano.
        os.system('"${EDITOR:-nano}" ' + tmp.name)
        __sessions__.new(tmp.name)
        __sessions__.current.file.name = title
        print_info("New file with title \"{0}\" added to the current session".format(bold(title)))

    ##
    # OPEN
    #
    # This command is used to open a session on a given file.
    # It either can be an external file path, or a SHA256 hash of a file which
    # has been previously imported and stored.
    # While the session is active, every operation and module executed will be
    # run against the file specified.
    def cmd_open(self, *args):
        parser = argparse.ArgumentParser(prog='open', description="Open a file", epilog="You can also specify a MD5 or SHA256 hash to a previously stored file in order to open a session on it.")
        group = parser.add_mutually_exclusive_group()
        group.add_argument('-f', '--file', action='store_true', help="Target is a file")
        group.add_argument('-u', '--url', action='store_true', help="Target is a URL")
        group.add_argument('-l', '--last', action='store_true', help="Target is the entry number from the last find command's results")
        parser.add_argument('-t', '--tor', action='store_true', help="Download the file through Tor")
        parser.add_argument("value", metavar='PATH, URL, HASH or ID', nargs='*', help="Target to open. Hash can be md5 or sha256. ID has to be from the last search.")

        try:
            args = parser.parse_args(args)
        except:
            return

        target = " ".join(args.value)

        if not args.last and target is None:
            parser.print_usage()
            return

        # If it's a file path, open a session on it.
        if args.file:
            target = os.path.expanduser(target)

            if not os.path.exists(target) or not os.path.isfile(target):
                self.log('error', "File not found: {0}".format(target))
                return

            __sessions__.new(target)
        # If it's a URL, download it and open a session on the temporary file.
        elif args.url:
            data = download(url=target, tor=args.tor)

            if data:
                tmp = tempfile.NamedTemporaryFile(delete=False)
                tmp.write(data)
                tmp.close()

                __sessions__.new(tmp.name)
        # Try to open the specified file from the list of results from
        # the last find command.
        elif args.last:
            if __sessions__.find:
                count = 1
                for item in __sessions__.find:
                    if count == int(target):
                        __sessions__.new(get_sample_path(item.sha256))
                        break

                    count += 1
            else:
                self.log('warning', "You haven't performed a find yet")
        # Otherwise we assume it's an hash of an previously stored sample.
        else:
            target = target.strip().lower()

            if len(target) == 32:
                key = 'md5'
            elif len(target) == 64:
                key = 'sha256'
            else:
                parser.print_usage()
                return

            rows = self.db.find(key=key, value=target)

            if not rows:
                self.log('warning', "No file found with the given hash {0}".format(target))
                return

            path = get_sample_path(rows[0].sha256)
            if path:
                __sessions__.new(path)

    ##
    # CLOSE
    #
    # This command resets the open session.
    # After that, all handles to the opened file should be closed and the
    # shell should be restored to the default prompt.
    def cmd_close(self, *args):
        __sessions__.close()

    ##
    # INFO
    #
    # This command returns information on the open session. It returns details
    # on the file (e.g. hashes) and other information that might available from
    # the database.
    def cmd_info(self, *args):
        if __sessions__.is_set():
            self.log('table', dict(
                header=['Key', 'Value'],
                rows=[
                    ['Name', __sessions__.current.file.name],
                    ['Tags', __sessions__.current.file.tags],
                    ['Path', __sessions__.current.file.path],
                    ['Size', __sessions__.current.file.size],
                    ['Type', __sessions__.current.file.type],
                    ['Mime', __sessions__.current.file.mime],
                    ['MD5', __sessions__.current.file.md5],
                    ['SHA1', __sessions__.current.file.sha1],
                    ['SHA256', __sessions__.current.file.sha256],
                    ['SHA512', __sessions__.current.file.sha512],
                    ['SSdeep', __sessions__.current.file.ssdeep],
                    ['CRC32', __sessions__.current.file.crc32]
                ]
            ))

    ##
    # NOTES
    #
    # This command allows you to view, add, modify and delete notes associated
    # with the current investigation.
    def cmd_notes(self, *args):
        parser = argparse.ArgumentParser(prog="notes", description="Show information on the current investigation")
        group = parser.add_mutually_exclusive_group()
        group.add_argument('-l', '--list', action='store_true', help="List all notes available for the current investigation")
        group.add_argument('-a', '--add', action='store_true', help="Add a new note to the current investigation")
        group.add_argument('-v', '--view', metavar='NOTE', help="View the specified note")
        group.add_argument('-e', '--edit', metavar='NOTE', type=int, help="Edit an existing note")
        group.add_argument('-d', '--delete', metavar='NOTE', type=int, help="Delete an existing note")

        notepath = __project__.path + '/notes'
        notelist = os.listdir(__project__.path + '/notes')

        try:
            args = parser.parse_args(args)
        except:
            return

        if __project__.name is None:
            print_error('Cannot store notes in the default investigation. Please open a new case.')
            return

        if args.list:
            # Retrieve all notes for the currently opened investigation.
            pass

            if len(notelist) < 1:
                self.log('info', "No notes available for this investigation yet")
                return

            # Build table of existing case notes
            rows = []
            notecount = 1
            for note in notelist:
                rows.append([notecount, note])
                notecount += 1

            # Display list of existing notes.
            self.log('table', dict(header=['ID', 'Title'], rows=rows))

        elif args.add:
            title = input("Enter a title for the new note: ")

            # Create a new temporary file.
            tmp = tempfile.NamedTemporaryFile(delete=False)
            # Open the temporary file with the default editor, or with nano.
            os.system('"${EDITOR:-nano}" ' + tmp.name)
            # Once the user is done editing, we need to read the content and
            # store it in the database.
            body = tmp.read()
            # store note in a file
            with open(notepath + '/' + title, 'w+') as note:
                note.write(body)

            # store note in the database

            # Finally, remove the temporary file.
            os.remove(tmp.name)

            self.log('info', "Note with title \"{0}\" added to the current investigation".format(bold(title)))

        elif args.view:
            # Retrieve note wth the specified ID and print it.
            title = args.view
            note = notepath + '/' + title
            if os.path.exists(note):
                self.log('info', bold('Title: ') + title)
                try:
                    with open(note, 'r') as notehndle:
                        self.log('info', bold('Body:') + '\n' + notehndle.read())
                except IOError:
                    print_error("Could not open note by title {0}".format(title))
            else:
                self.log('info', "There is no note with title {0}".format(args.view))

        elif args.edit:
            # Retrieve note with the specified ID.
            note = Database().get_note(args.edit)
            if note:
                # Create a new temporary file.
                tmp = tempfile.NamedTemporaryFile(delete=False)
                # Write the old body to the temporary file.
                tmp.write(note.body)
                tmp.close()
                # Open the old body with the text editor.
                os.system('"${EDITOR:-nano}" ' + tmp.name)
                # Read the new body from the temporary file.
                body = open(tmp.name, 'r').read()
                # Update the note entry with the new body.
                Database().edit_note(args.edit, body)
                # Remove the temporary file.
                os.remove(tmp.name)

                self.log('info', "Updated note with ID {0}".format(args.edit))

        elif args.delete:
            # Delete the note with the specified ID.
            Database().delete_note(args.delete)
        else:
            parser.print_usage()

    ##
    # STORE
    #
    # This command stores the opened file in the local repository and tries
    # to store details in the database.
    def cmd_store(self, *args):
        parser = argparse.ArgumentParser(prog='store', description="Store the opened file in the current investigation")
        parser.add_argument('-d', '--delete', action='store_true', help="Delete the original file")
        parser.add_argument('-f', '--folder', type=str, nargs='+', help="Specify a folder to import")
        parser.add_argument('-s', '--file-size', type=int, help="Specify a maximum file size")
        parser.add_argument('-y', '--file-type', type=str, help="Specify a file type pattern")
        parser.add_argument('-n', '--file-name', type=str, help="Specify a file name pattern")
        parser.add_argument('-t', '--tags', type=str, nargs='+', help="Specify a list of comma-separated tags")

        try:
            args = parser.parse_args(args)
        except:
            return

        if args.folder is not None:
            # Allows to have spaces in the path.
            args.folder = " ".join(args.folder)

        if args.tags is not None:
            # Remove the spaces in the list of tags
            args.tags = "".join(args.tags)

        def add_file(obj, tags=None):
            if get_sample_path(obj.sha256):
                self.log('warning', "Skip, file \"{0}\" appears to be already stored".format(obj.name))
                return False

            if __project__.name:
                pass
            else:
                print_error("Must open an investigation to store files")
                return False

            # Try to store file object into database.
            status = self.db.add(obj=obj, tags=tags)
            if status:
                # If succeeds, store also in the local repository.
                # If something fails in the database (for example unicode strings)
                # we don't want to have the binary lying in the repository with no
                # associated database record.
                new_path = store_sample(obj)
                self.log("success", "Stored file \"{0}\" to {1}".format(obj.name, new_path))
            else:
                return False

            # Delete the file if requested to do so.
            if args.delete:
                try:
                    os.unlink(obj.path)
                except Exception as e:
                    self.log('warning', "Failed deleting file: {0}".format(e))

            return True

        # If the user specified the --folder flag, we walk recursively and try
        # to add all contained files to the local repository.
        # This is not going to open a new session.
        # TODO: perhaps disable or make recursion optional?
        if args.folder is not None:
            # Check if the specified folder is valid.
            if os.path.isdir(args.folder):
                # Walk through the folder and subfolders.
                for dir_name, dir_names, file_names in walk(args.folder):
                    # Add each collected file.
                    for file_name in file_names:
                        file_path = os.path.join(dir_name, file_name)

                        if not os.path.exists(file_path):
                            continue
                        # Check if file is not zero.
                        if not os.path.getsize(file_path) > 0:
                            continue

                        # Check if the file name matches the provided pattern.
                        if args.file_name:
                            if not fnmatch.fnmatch(file_name, args.file_name):
                                # self.log('warning', "Skip, file \"{0}\" doesn't match the file name pattern".format(file_path))
                                continue

                        # Check if the file type matches the provided pattern.
                        if args.file_type:
                            if args.file_type not in File(file_path).type:
                                # self.log('warning', "Skip, file \"{0}\" doesn't match the file type".format(file_path))
                                continue

                        # Check if file exceeds maximum size limit.
                        if args.file_size:
                            # Obtain file size.
                            if os.path.getsize(file_path) > args.file_size:
                                self.log('warning', "Skip, file \"{0}\" is too big".format(file_path))
                                continue

                        file_obj = File(file_path)

                        # Add file.
                        add_file(file_obj, args.tags)
            else:
                self.log('error', "You specified an invalid folder: {0}".format(args.folder))
        # Otherwise we try to store the currently opened file, if there is any.
        else:
            if __sessions__.is_set():
                if __sessions__.current.file.size == 0:
                    self.log('warning', "Skip, file \"{0}\" appears to be empty".format(__sessions__.current.file.name))
                    return False

                # Add file.
                if add_file(__sessions__.current.file, args.tags):
                    # Open session to the new file.
                    self.cmd_open(*[__sessions__.current.file.sha256])
            else:
                self.log('error', "No session opened")

    ##
    # DELETE
    #
    # This commands deletes the currenlty opened file (only if it's stored in
    # the local repository) and removes the details from the database
    def cmd_delete(self, *args):
        if __sessions__.is_set():
            while True:
                choice = input("Are you sure you want to delete this binary? Can't be reverted! [y/n] ")
                if choice == 'y':
                    break
                elif choice == 'n':
                    return

            rows = self.db.find('sha256', __sessions__.current.file.sha256)
            if rows:
                malware_id = rows[0].id
                if self.db.delete_file(malware_id):
                    self.log("success", "File deleted")
                else:
                    self.log('error', "Unable to delete file")

            os.remove(__sessions__.current.file.path)
            __sessions__.close()
        else:
            self.log('error', "No session opened")

    ##
    # FIND
    #
    # This command is used to search for files in the database.
    def cmd_find(self, *args):
        parser = argparse.ArgumentParser(prog='find', description="Find a file")
        group = parser.add_mutually_exclusive_group()
        group.add_argument('-t', '--tags', action='store_true', help="List available tags and quit")
        group.add_argument('type', nargs='?', choices=["all", "latest", "name", "type", "mime", "md5", "sha256", "tag", "note"], help="Where to search.")
        parser.add_argument("value", nargs='?', help="String to search.")
        try:
            args = parser.parse_args(args)
        except:
            return

        # One of the most useful search terms is by tag. With the --tags
        # argument we first retrieve a list of existing tags and the count
        # of files associated with each of them.
        if args.tags:
            # Retrieve list of tags.
            tags = self.db.list_tags()

            if tags:
                rows = []
                # For each tag, retrieve the count of files associated with it.
                for tag in tags:
                    count = len(self.db.find('tag', tag.tag))
                    rows.append([tag.tag, count])

                # Generate the table with the results.
                header = ['Tag', '# Entries']
                rows.sort(key=lambda x: x[1], reverse=True)
                self.log('table', dict(header=header, rows=rows))
            else:
                self.log('warning', "No tags available")

            return

        # At this point, if there are no search terms specified, return.
        if args.type is None:
            parser.print_usage()
            return

        key = args.type
        if key != 'all' and key != 'latest':
            try:
                # The second argument is the search value.
                value = args.value
            except IndexError:
                self.log('error', "You need to include a search term.")
                return
        else:
            value = None

        # Search all the files matching the given parameters.
        items = self.db.find(key, value)
        if not items:
            return

        # Populate the list of search results.
        rows = []
        count = 1
        for item in items:
            tag = ', '.join([t.tag for t in item.tag if t.tag])
            row = [count, item.name, item.mime, item.md5, tag]
            if key == 'latest':
                row.append(item.created_at)

            rows.append(row)
            count += 1

        # Update find results in current session.
        __sessions__.find = items

        # Generate a table with the results.
        header = ['#', 'Name', 'Mime', 'MD5', 'Tags']
        if key == 'latest':
            header.append('Created At')

        self.log("table", dict(header=header, rows=rows))

    ##
    # TAGS
    #
    # This command is used to modify the tags of the opened file.
    def cmd_tags(self, *args):
        parser = argparse.ArgumentParser(prog='tags', description="Modify tags of the opened file")
        parser.add_argument('-a', '--add', metavar='TAG', help="Add tags to the opened file (comma separated)")
        parser.add_argument('-d', '--delete', metavar='TAG', help="Delete a tag from the opened file")
        try:
            args = parser.parse_args(args)
        except:
            return

        # This command requires a session to be opened.
        if not __sessions__.is_set():
            self.log('error', "No session opened")
            parser.print_usage()
            return

        # If no arguments are specified, there's not much to do.
        # However, it could make sense to also retrieve a list of existing
        # tags from this command, and not just from the "find" command alone.
        if args.add is None and args.delete is None:
            parser.print_usage()
            return

        # TODO: handle situation where addition or deletion of a tag fail.

        db = Database()
        if not db.find(key='sha256', value=__sessions__.current.file.sha256):
            self.log('error', "The opened file is not stored in the database. "
                "If you want to add it use the `store` command.")
            return

        if args.add:
            # Add specified tags to the database's entry belonging to
            # the opened file.
            db.add_tags(__sessions__.current.file.sha256, args.add)
            self.log('info', "Tags added to the currently opened file")

            # We refresh the opened session to update the attributes.
            # Namely, the list of tags returned by the 'info' command
            # needs to be re-generated, or it wouldn't show the new tags
            # until the existing session is closed a new one is opened.
            self.log('info', "Refreshing session to update attributes...")
            __sessions__.new(__sessions__.current.file.path)

        if args.delete:
            # Delete the tag from the database.
            db.delete_tag(args.delete, __sessions__.current.file.sha256)
            # Refresh the session so that the attributes of the file are
            # updated.
            self.log('info', "Refreshing session to update attributes...")
            __sessions__.new(__sessions__.current.file.path)

    ###
    # SESSION
    #
    # This command is used to list and switch across all the opened sessions.
    def cmd_sessions(self, *args):
        parser = argparse.ArgumentParser(prog='sessions', description="Open a file", epilog="List or switch sessions")
        group = parser.add_mutually_exclusive_group()
        group.add_argument('-l', '--list', action='store_true', help="List all existing sessions")
        group.add_argument('-s', '--switch', type=int, help="Switch to the specified session")

        try:
            args = parser.parse_args(args)
        except:
            return

        if args.list:
            if not __sessions__.sessions:
                self.log('info', "There are no opened sessions")
                return

            rows = []
            for session in __sessions__.sessions:
                current = ''
                if session == __sessions__.current:
                    current = 'Yes'

                rows.append([
                    session.id,
                    session.file.name,
                    session.file.md5,
                    session.created_at,
                    current
                ])

            self.log('info', "Opened Sessions:")
            self.log("table", dict(header=['#', 'Name', 'MD5', 'Created At', 'Current'], rows=rows))
        elif args.switch:
            for session in __sessions__.sessions:
                if args.switch == session.id:
                    __sessions__.switch(session)
                    return

            self.log('warning', "The specified session ID doesn't seem to exist")
        else:
            parser.print_usage()

    ##
    # INVESTIGATIONS
    #
    # This command retrieves a list of all projects.
    # You can also switch to a different project.
    def cmd_investigations(self, *args):
        parser = argparse.ArgumentParser(prog='investigations', description="Open a case", epilog="List or switch current investigations")
        group = parser.add_mutually_exclusive_group()
        group.add_argument('-l', '--list', action='store_true', help="List all existing investigations")
        group.add_argument('-s', '--switch', metavar='NAME', help="Switch to the specified investigation")
        group.add_argument('-d', '--delete', type=int, metavar='ID', help="delete investigation by id.")

        try:
            args = parser.parse_args(args)
        except:
            return

        projects_path = os.path.join(os.getcwd(), 'investigations')

        if not os.path.exists(projects_path):
            self.log('info', "The investigations directory does not exist yet")
            return

        if args.list:
            self.log('info', "Current Investigations:")
            rows = []
            items = self.db.get_investigation_list()

            # Populate the list of search results.
            count = 1
            for item in items:
                row = [item.id, item.name]
                rows.append(row)

            self.log('table', dict(header=['ID', 'Name'], rows=rows))
        elif args.switch:
            if __sessions__.is_set():
                __sessions__.close()
                self.log('info', "Closed opened session")

            __project__.open(args.switch, self.db)
            self.log('info', "Switched to investigation {0}".format(bold(args.switch)))

            # Need to re-initialize the Database to open the new SQLite file.
            self.db = Database()
        elif args.delete:
            if __sessions__.is_set():
                __sessions__.close()
                self.log('info', "Closed opened session")

            __project__.delete(args.delete, self.db)
            self.log('info', "Deleted investigation {0}".format(bold(args.delete)))

            # Need to re-initialize the Database to open the new SQLite file.
            self.db = Database()
        else:
            self.log('info', parser.print_usage())

    ##
    # EXPORT
    #
    # This command will export the current session to file or zip.
    def cmd_export(self, *args):
        parser = argparse.ArgumentParser(prog='export', description="Export the current session to file or zip")
        parser.add_argument('-z', '--zip', action='store_true', help="Export session in a zip archive")
        parser.add_argument('value', help="path or archive name")

        try:
            args = parser.parse_args(args)
        except:
            return

        # This command requires a session to be opened.
        if not __sessions__.is_set():
            self.log('error', "No session opened")
            parser.print_usage()
            return

        # Check for valid export path.
        if args.value is None:
            parser.print_usage()
            return

        # TODO: having for one a folder and for the other a full
        # target path can be confusing. We should perhaps standardize this.

        # Abort if the specified path already exists.
        if os.path.isfile(args.value):
            self.log('error', "File at path \"{0}\" already exists, abort".format(args.value))
            return

        # If the argument chosed so, archive the file when exporting it.
        # TODO: perhaps add an option to use a password for the archive
        # and default it to "infected".
        if args.zip:
            try:
                with ZipFile(args.value, 'w') as export_zip:
                    export_zip.write(__sessions__.current.file.path, arcname=__sessions__.current.file.name)
            except IOError as e:
                self.log('error', "Unable to export file: {0}".format(e))
            else:
                self.log('info', "File archived and exported to {0}".format(args.value))
        # Otherwise just dump it to the given directory.
        else:
            # XXX: Export file with the original file name.
            store_path = os.path.join(args.value, __sessions__.current.file.name)

            try:
                shutil.copyfile(__sessions__.current.file.path, store_path)
            except IOError as e:
                self.log('error', "Unable to export file: {0}".format(e))
            else:
                self.log('info', "File exported to {0}".format(store_path))
                
    ##
    # Stats
    #
    # This command allows you to generate basic statistics for the stored files.
    def cmd_stats(self, *args):
        parser = argparse.ArgumentParser(prog='stats', description="Display Database File Statistics")
        parser.add_argument('-t', '--top', type=int, help='Top x Items')

        try:
            args = parser.parse_args(args)
        except:
            return


        arg_top = args.top
        db = Database()

        # Set all Counters Dict
        extension_dict = defaultdict(int)
        mime_dict = defaultdict(int)
        tags_dict = defaultdict(int)
        size_list = []
        
        # Find all
        items = self.db.find('all')
        
        if len(items) < 1:
            self.log('info', "No items in database to generate stats")
            return
        
        # Sort in to stats
        for item in items:
            if '.' in item.name:
                ext = item.name.split('.')
                extension_dict[ext[-1]] += 1
            mime_dict[item.mime] += 1
            size_list.append(item.size)
            for t in item.tag:
                if t.tag:
                    tags_dict[t.tag] += 1
        
        avg_size = sum(size_list) / len(size_list)
        all_stats = {'Total':len(items), 'File Extension':extension_dict, 'Mime':mime_dict, 'Tags':tags_dict, 'Avg Size':avg_size, 'Largest':max(size_list), 'Smallest':min(size_list)}

        # Counter for top x
        if arg_top:
            counter = arg_top
            prefix = 'Top {0} '.format(counter)
        else:
            counter = len(items)
            prefix = ''
        
        # Project Stats Last as i have it iterate them all
        
        # Print all the results
        
        self.log('info', "Projects")
        self.log('table', dict(header=['Name', 'Count'], rows=[['Main', len(items)], ['Next', '10']]))
        
        # For Current Project
        self.log('info', "Current Project")
        
        # Extension
        self.log('info', "{0}Extensions".format(prefix))
        header = ['Ext', 'Count']
        rows = []

        for k in sorted(extension_dict, key=extension_dict.get, reverse=True)[:counter]:
            rows.append([k, extension_dict[k]])
        self.log('table', dict(header=header, rows=rows))
        
        
        # Mimes
        self.log('info', "{0}Mime Types".format(prefix))
        header = ['Mime', 'Count']
        rows = []
        for k in sorted(mime_dict, key=mime_dict.get, reverse=True)[:counter]:
            rows.append([k, mime_dict[k]])
        self.log('table', dict(header=header, rows=rows))
        
        # Tags
        self.log('info', "{0}Tags".format(prefix))
        header = ['Tag', 'Count']
        rows = []
        for k in sorted(tags_dict, key=tags_dict.get, reverse=True)[:counter]:
            rows.append([k, tags_dict[k]])
        self.log('table', dict(header=header, rows=rows))
        
        # Size
        self.log('info', "Size Stats")
        self.log('item', "Largest  {0}".format(convert_size(max(size_list))))
        self.log('item', "Smallest  {0}".format(convert_size(min(size_list))))
        self.log('item', "Average  {0}".format(convert_size(avg_size)))

Example #21

Show file

File: pe.py Project: killbug2004/CIRTKit

    def language(self):

        def get_iat(pe):
            iat = []
            if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
                for peimport in pe.DIRECTORY_ENTRY_IMPORT:
                    iat.append(peimport.dll)

            return iat

        def check_module(iat, match):
            for imp in iat:
                if imp.find(match) != -1:
                    return True

            return False

        def is_cpp(data, cpp_count):
            for line in data:
                if 'type_info' in line or 'RTTI' in line:
                    cpp_count += 1
                    break

            if cpp_count == 2:
                return True

            return False

        def is_delphi(data):
            for line in data:
                if 'Borland' in line:
                    path = line.split('\\')
                    for p in path:
                        if 'Delphi' in p:
                            return True
            return False

        def is_vbdotnet(data):
            for line in data:
                if 'Compiler' in line:
                    stuff = line.split('.')
                    if 'VisualBasic' in stuff:
                        return True

            return False

        def is_autoit(data):
            for line in data:
                if 'AU3!' in line:
                    return True

            return False

        def is_packed(pe):
            for section in pe.sections:
                if section.get_entropy() > 7:
                    return True

            return False

        def get_strings(content):
            regexp = '[\x30-\x39\x41-\x5f\x61-\x7a\-\.:]{4,}'
            return re.findall(regexp, content)

        def find_language(iat, sample, content):
            dotnet = False
            cpp_count = 0
            found = None

            # VB check
            if check_module(iat, 'VB'):
                self.log('info', "{0} - Possible language: Visual Basic".format(sample.name))
                return True

            # .NET check
            if check_module(iat, 'mscoree.dll') and not found:
                dotnet = True
                found = '.NET'

            # C DLL check
            if not found and (check_module(iat, 'msvcr') or check_module(iat, 'MSVCR') or check_module(iat, 'c++')):
                cpp_count += 1

            if not found:
                data = get_strings(content)

                if is_cpp(data, cpp_count) and not found:
                    found = 'CPP'
                if not found and cpp_count == 1:
                    found = 'C'
                if not dotnet and is_delphi(data) and not found:
                    found = 'Delphi'
                if dotnet and is_vbdotnet(data):
                    found = 'Visual Basic .NET'
                if is_autoit(data) and not found:
                    found = 'AutoIt'

            return found

        if not self.__check_session():
            return

        if is_packed(self.pe):
            self.log('warning', "Probably packed, the language guess might be unreliable")

        language = find_language(
            get_iat(self.pe),
            __sessions__.current.file,
            __sessions__.current.file.data
        )

        if language:
            self.log('info', "Probable language: {0}".format(bold(language)))
        else:
            self.log('error', "Programming language not identified")
            return

        if self.args.scan:
            self.log('info', "Scanning the repository for matching samples...")

            db = Database()
            samples = db.find(key='all')

            matches = []
            for sample in samples:
                if sample.sha256 == __sessions__.current.file.sha256:
                    continue

                sample_path = get_sample_path(sample.sha256)

                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_pe = pefile.PE(sample_path)
                except pefile.PEFormatError as e:
                    continue

                cur_packed = ''
                if is_packed(cur_pe):
                    cur_packed = 'Yes'

                cur_language = find_language(
                    get_iat(cur_pe),
                    sample,
                    open(sample_path, 'rb').read()
                )

                if not cur_language:
                    continue

                if cur_language == language:
                    matches.append([sample.name, sample.md5, cur_packed])

            if matches:
                self.log('table', dict(header=['Name', 'MD5', 'Is Packed'], rows=matches))
            else:
                self.log('info', "No matches found")

Example #22

Show file

File: pe.py Project: killbug2004/CIRTKit

    def imphash(self):
        if self.args.scan and self.args.cluster:
            self.log('error', "You selected two exclusive options, pick one")
            return

        if self.args.cluster:
            self.log('info', "Clustering all samples by imphash...")

            db = Database()
            samples = db.find(key='all')

            cluster = {}
            for sample in samples:
                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                try:
                    cur_imphash = pefile.PE(sample_path).get_imphash()
                except:
                    continue

                if cur_imphash not in cluster:
                    cluster[cur_imphash] = []

                cluster[cur_imphash].append([sample.sha256, sample.name])

            for cluster_name, cluster_members in cluster.items():
                # Skipping clusters with only one entry.
                if len(cluster_members) == 1:
                    continue

                self.log('info', "Imphash cluster {0}".format(bold(cluster_name)))

                self.log('table', dict(header=['MD5', 'Name'],
                    rows=cluster_members))

            return

        if self.__check_session():
            try:
                imphash = self.pe.get_imphash()
            except AttributeError:
                self.log('error', "No imphash support, upgrade pefile to a version >= 1.2.10-139 (`pip install --upgrade pefile`)")
                return

            self.log('info', "Imphash: {0}".format(bold(imphash)))

            if self.args.scan:
                self.log('info', "Scanning the repository for matching samples...")

                db = Database()
                samples = db.find(key='all')

                matches = []
                for sample in samples:
                    if sample.sha256 == __sessions__.current.file.sha256:
                        continue

                    sample_path = get_sample_path(sample.sha256)
                    if not os.path.exists(sample_path):
                        continue

                    try:
                        cur_imphash = pefile.PE(sample_path).get_imphash()
                    except:
                        continue

                    if imphash == cur_imphash:
                        matches.append([sample.name, sample.sha256])

                self.log('info', "{0} relevant matches found".format(bold(len(matches))))

                if len(matches) > 0:
                    self.log('table', dict(header=['Name', 'SHA256'], rows=matches))

Example #23

Show file

File: pe.py Project: killbug2004/CIRTKit

    def resources(self):

        # Use this function to retrieve resources for the given PE instance.
        # Returns all the identified resources with indicators and attributes.
        def get_resources(pe):
            resources = []
            if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'):
                count = 1
                for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries:
                    try:
                        resource = {}

                        if resource_type.name is not None:
                            name = str(resource_type.name)
                        else:
                            name = str(pefile.RESOURCE_TYPE.get(resource_type.struct.Id))

                        if name is None:
                            name = str(resource_type.struct.Id)

                        if hasattr(resource_type, 'directory'):
                            for resource_id in resource_type.directory.entries:
                                if hasattr(resource_id, 'directory'):
                                    for resource_lang in resource_id.directory.entries:
                                        data = pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size)
                                        filetype = get_type(data)
                                        md5 = get_md5(data)
                                        language = pefile.LANG.get(resource_lang.data.lang, None)
                                        sublanguage = pefile.get_sublang_name_for_lang(resource_lang.data.lang, resource_lang.data.sublang)
                                        offset = ('%-8s' % hex(resource_lang.data.struct.OffsetToData)).strip()
                                        size = ('%-8s' % hex(resource_lang.data.struct.Size)).strip()

                                        resource = [count, name, offset, md5, size, filetype, language, sublanguage]

                                        # Dump resources if requested to and if the file currently being
                                        # processed is the opened session file.
                                        # This is to avoid that during a --scan all the resources being
                                        # scanned are dumped as well.
                                        if (self.args.open or self.args.dump) and pe == self.pe:
                                            if self.args.dump:
                                                folder = self.args.dump
                                            else:
                                                folder = tempfile.mkdtemp()

                                            resource_path = os.path.join(folder, '{0}_{1}_{2}'.format(__sessions__.current.file.md5, offset, name))
                                            resource.append(resource_path)

                                            with open(resource_path, 'wb') as resource_handle:
                                                resource_handle.write(data)

                                        resources.append(resource)

                                        count += 1
                    except Exception as e:
                        self.log('error', e)
                        continue

            return resources

        if not self.__check_session():
            return

        # Obtain resources for the currently opened file.
        resources = get_resources(self.pe)

        if not resources:
            self.log('warning', "No resources found")
            return

        headers = ['#', 'Name', 'Offset', 'MD5', 'Size', 'File Type', 'Language', 'Sublanguage']
        if self.args.dump or self.args.open:
            headers.append('Dumped To')

        self.log('table', dict(header=headers, rows=resources))

        # If instructed, open a session on the given resource.
        if self.args.open:
            for resource in resources:
                if resource[0] == self.args.open:
                    __sessions__.new(resource[8])
                    return
        # If instructed to perform a scan across the repository, start looping
        # through all available files.
        elif self.args.scan:
            self.log('info', "Scanning the repository for matching samples...")

            # Retrieve list of samples stored locally and available in the
            # database.
            db = Database()
            samples = db.find(key='all')

            matches = []
            for sample in samples:
                # Skip if it's the same file.
                if sample.sha256 == __sessions__.current.file.sha256:
                    continue

                # Obtain path to the binary.
                sample_path = get_sample_path(sample.sha256)
                if not os.path.exists(sample_path):
                    continue

                # Open PE instance.
                try:
                    cur_pe = pefile.PE(sample_path)
                except:
                    continue

                # Obtain the list of resources for the current iteration.
                cur_resources = get_resources(cur_pe)
                matched_resources = []
                # Loop through entry's resources.
                for cur_resource in cur_resources:
                    # Loop through opened file's resources.
                    for resource in resources:
                        # If there is a common resource, add it to the list.
                        if cur_resource[3] == resource[3]:
                            matched_resources.append(resource[3])

                # If there are any common resources, add the entry to the list
                # of matched samples.
                if len(matched_resources) > 0:
                    matches.append([sample.name, sample.md5, '\n'.join(r for r in matched_resources)])

            self.log('info', "{0} relevant matches found".format(bold(len(matches))))

            if len(matches) > 0:
                self.log('table', dict(header=['Name', 'MD5', 'Resource MD5'], rows=matches))

Example #24

Show file

File: yarascan.py Project: killbug2004/CIRTKit

    def scan(self):

        def string_printable(line):
            line = str(line)
            new_line = ''
            for c in line:
                if c in printstring.printable:
                    new_line += c
                else:
                    new_line += '\\x' + c.encode('hex')
            return new_line

        # This means users can just drop or remove rule files without
        # having to worry about maintaining the index.
        # TODO: make paths absolute.
        # TODO: this regenerates the file at every run, perhaps we
        # could find a way to optimize this.
        def rule_index():
            tmp_path = os.path.join(tempfile.gettempdir(), 'index.yara')
            with open(tmp_path, 'w') as rules_index:
                for rule_file in os.listdir(self.rule_path):
                    # Skip if the extension is not right, could cause problems.
                    if not rule_file.endswith('.yar') and not rule_file.endswith('.yara'):
                        continue
                    # Skip if it's the index itself.
                    if rule_file == 'index.yara':
                        continue

                    # Add the rule to the index.
                    line = 'include "{0}"\n'.format(os.path.join(self.rule_path, rule_file))
                    rules_index.write(line)

            return tmp_path

        arg_rule = self.args.rule
        arg_scan_all = self.args.all
        arg_tag = self.args.tag

        # If no custom ruleset is specified, we use the default one.
        if not arg_rule:
            arg_rule = rule_index()

        # Check if the selected ruleset actually exists.
        if not os.path.exists(arg_rule):
            self.log('error', "No valid Yara ruleset at {0}".format(arg_rule))
            return

        # Compile all rules from given ruleset.
        rules = yara.compile(arg_rule)
        files = []

        # If there is a session open and the user didn't specifically
        # request to scan the full repository, we just add the currently
        # opened file's path.
        if __sessions__.is_set() and not arg_scan_all:
            files.append(__sessions__.current.file)
        # Otherwise we loop through all files in the repository and queue
        # them up for scan.
        else:
            self.log('info', "Scanning all stored files...")

            db = Database()
            samples = db.find(key='all')

            for sample in samples:
                files.append(sample)

        for entry in files:
            if entry.size == 0:
                continue

            self.log('info', "Scanning {0} ({1})".format(entry.name, entry.sha256))

            # Check if the entry has a path attribute. This happens when
            # there is a session open. We need to distinguish this just for
            # the cases where we're scanning an opened file which has not been
            # stored yet.
            if hasattr(entry, 'path'):
                entry_path = entry.path
            # This should be triggered only when scanning the full repository.
            else:
                entry_path = get_sample_path(entry.sha256)

            # Check if the file exists before running the yara scan.
            if not os.path.exists(entry_path):
                self.log('error', "The file does not exist at path {0}".format(entry_path))
                return

            rows = []
            tag_list = []
            for match in rules.match(entry_path):
                # Add a row for each string matched by the rule.
                for string in match.strings:
                    rows.append([match.rule, string_printable(string[1]), string_printable(string[0]), string_printable(string[2])])

                # Add matching rules to our list of tags.
                # First it checks if there are tags specified in the metadata
                # of the Yara rule.
                match_tags = match.meta.get('tags')
                # If not, use the rule name.
                # TODO: as we add more and more yara rules, we might remove
                # this option and only tag the file with rules that had
                # tags specified in them.
                if not match_tags:
                    match_tags = match.rule

                # Add the tags to the list.
                tag_list.append([entry.sha256, match_tags])

            if rows:
                header = [
                    'Rule',
                    'String',
                    'Offset',
                    'Content'
                ]
                self.log('table', dict(header=header, rows=rows))

            # If we selected to add tags do that now.
            if rows and arg_tag:
                db = Database()
                for tag in tag_list:
                    db.add_tags(tag[0], tag[1])

                # If in a session reset the session to see tags.
                if __sessions__.is_set() and not arg_scan_all:
                    self.log('info', "Refreshing session to update attributes...")
                    __sessions__.new(__sessions__.current.file.path)

Example #25

Show file

    def scan(self):
        def string_printable(line):
            line = str(line)
            new_line = ''
            for c in line:
                if c in printstring.printable:
                    new_line += c
                else:
                    new_line += '\\x' + c.encode('hex')
            return new_line

        # This means users can just drop or remove rule files without
        # having to worry about maintaining the index.
        # TODO: make paths absolute.
        # TODO: this regenerates the file at every run, perhaps we
        # could find a way to optimize this.
        def rule_index():
            tmp_path = os.path.join(tempfile.gettempdir(), 'index.yara')
            with open(tmp_path, 'w') as rules_index:
                for rule_file in os.listdir(self.rule_path):
                    # Skip if the extension is not right, could cause problems.
                    if not rule_file.endswith(
                            '.yar') and not rule_file.endswith('.yara'):
                        continue
                    # Skip if it's the index itself.
                    if rule_file == 'index.yara':
                        continue

                    # Add the rule to the index.
                    line = 'include "{0}"\n'.format(
                        os.path.join(self.rule_path, rule_file))
                    rules_index.write(line)

            return tmp_path

        arg_rule = self.args.rule
        arg_scan_all = self.args.all
        arg_tag = self.args.tag

        # If no custom ruleset is specified, we use the default one.
        if not arg_rule:
            arg_rule = rule_index()

        # Check if the selected ruleset actually exists.
        if not os.path.exists(arg_rule):
            self.log('error', "No valid Yara ruleset at {0}".format(arg_rule))
            return

        # Compile all rules from given ruleset.
        rules = yara.compile(arg_rule)
        files = []

        # If there is a session open and the user didn't specifically
        # request to scan the full repository, we just add the currently
        # opened file's path.
        if __sessions__.is_set() and not arg_scan_all:
            files.append(__sessions__.current.file)
        # Otherwise we loop through all files in the repository and queue
        # them up for scan.
        else:
            self.log('info', "Scanning all stored files...")

            db = Database()
            samples = db.find(key='all')

            for sample in samples:
                files.append(sample)

        for entry in files:
            if entry.size == 0:
                continue

            self.log('info',
                     "Scanning {0} ({1})".format(entry.name, entry.sha256))

            # Check if the entry has a path attribute. This happens when
            # there is a session open. We need to distinguish this just for
            # the cases where we're scanning an opened file which has not been
            # stored yet.
            if hasattr(entry, 'path'):
                entry_path = entry.path
            # This should be triggered only when scanning the full repository.
            else:
                entry_path = get_sample_path(entry.sha256)

            # Check if the file exists before running the yara scan.
            if not os.path.exists(entry_path):
                self.log(
                    'error',
                    "The file does not exist at path {0}".format(entry_path))
                return

            rows = []
            tag_list = []
            for match in rules.match(entry_path):
                # Add a row for each string matched by the rule.
                for string in match.strings:
                    rows.append([
                        match.rule,
                        string_printable(string[1]),
                        string_printable(string[0]),
                        string_printable(string[2])
                    ])

                # Add matching rules to our list of tags.
                # First it checks if there are tags specified in the metadata
                # of the Yara rule.
                match_tags = match.meta.get('tags')
                # If not, use the rule name.
                # TODO: as we add more and more yara rules, we might remove
                # this option and only tag the file with rules that had
                # tags specified in them.
                if not match_tags:
                    match_tags = match.rule

                # Add the tags to the list.
                tag_list.append([entry.sha256, match_tags])

            if rows:
                header = ['Rule', 'String', 'Offset', 'Content']
                self.log('table', dict(header=header, rows=rows))

            # If we selected to add tags do that now.
            if rows and arg_tag:
                db = Database()
                for tag in tag_list:
                    db.add_tags(tag[0], tag[1])

                # If in a session reset the session to see tags.
                if __sessions__.is_set() and not arg_scan_all:
                    self.log('info',
                             "Refreshing session to update attributes...")
                    __sessions__.new(__sessions__.current.file.path)