Example #1
0
    def get_urls(self):
        """Extract all URLs embedded in this file through a simple regex."""
        if not os.path.getsize(self.file_path):
            return []

        # http://stackoverflow.com/a/454589
        urls = set()
        f = open(self.file_path, "rb")
        m = mmap.mmap(f.fileno(), 0, access=mmap.PROT_READ)

        for url in re.findall(URL_REGEX, m):
            url = list(url)
            if not url[2]:
                m = re.match(IP_REGEX, url[1])
                if m is None:
                    tld = "".join(url[1].split(".")[-1:])
                    while not is_whitelisted_tld(tld) and tld:
                        url[1] = url[1][:-1]
                        tld = tld[:-1]
                    if not tld:
                        continue
            if not is_whitelisted_domain(url[1]):
                if not is_whitelisted_url("".join(url)):
                    urls.add("".join(url))

        return list(urls)
Example #2
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                dmp_path = os.path.join(self.pmemory_path, dmp)
                dmp_file = File(dmp_path)

                # Let's hope the file is not too big.
                buf = open(dmp_path, "rb").read()
                urls = set()
                for url in re.findall(HTTP_REGEX, buf):
                    if not is_whitelisted_domain(url[1]):
                        urls.add("".join(url))

                proc = dict(
                    file=dmp_path,
                    pid=int(os.path.basename(dmp_path).split("-")[0]),
                    yara=dmp_file.get_yara(
                        os.path.join(CUCKOO_ROOT, "data", "yara",
                                     "index_memory.yar")),
                    urls=list(urls),
                )

                results.append(proc)

        return results
Example #3
0
    def get_urls(self):
        """Extract all URLs embedded in this file through a simple regex."""
        if not os.path.getsize(self.file_path):
            return []

        # http://stackoverflow.com/a/454589
        urls = set()
        f = open(self.file_path, "rb")
        m = mmap.mmap(f.fileno(), 0, access=mmap.PROT_READ)

        for url in re.findall(URL_REGEX, m):
            url = list(url)
            if not url[2] :
                m = re.match(IP_REGEX,url[1])
                if m is None :
                    tld = "".join(url[1].split(".")[-1:])
                    while not is_whitelisted_tld (tld) and tld :
                        url[1]=url[1][:-1]
                        tld=tld[:-1]
                        if not tld :
                            continue
            if not is_whitelisted_domain(url[1]):
                if not is_whitelisted_url("".join(url)):
                            urls.add("".join(url))

        return list(urls)
Example #4
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                dmp_path = os.path.join(self.pmemory_path, dmp)
                dmp_file = File(dmp_path)

                # Let's hope the file is not too big.
                buf = open(dmp_path, "rb").read()
                urls = set()
                for url in re.findall(HTTP_REGEX, buf):
                    if not is_whitelisted_domain(url[1]):
                        urls.add("".join(url))

                proc = dict(
                    file=dmp_path,
                    pid=os.path.splitext(os.path.basename(dmp_path))[0],
                    yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")),
                    urls=list(urls),
                )

                results.append(proc)

        return results
Example #5
0
    def extract_urls(self, filepath):
        # http://stackoverflow.com/a/454589
        urls = set()
        f = open(filepath, "rb")
        m = mmap.mmap(f.fileno(), 0, access=mmap.PROT_READ)

        for url in re.findall(HTTP_REGEX, m):
            if not is_whitelisted_domain(url[1]):
                urls.add("".join(url))

        return urls
Example #6
0
    def extract_urls(self, filepath):
        # http://stackoverflow.com/a/454589
        urls = set()
        f = open(filepath, "rb")
        m = mmap.mmap(f.fileno(), 0, access=mmap.PROT_READ)

        for url in re.findall(HTTP_REGEX, m):
            if not is_whitelisted_domain(url[1]):
                urls.add("".join(url))

        return urls
Example #7
0
    def get_urls(self):
        """Extract all URLs embedded in this file through a simple regex."""
        if not os.path.getsize(self.file_path):
            return []

        # http://stackoverflow.com/a/454589
        urls = set()
        f = open(self.file_path, "rb")
        m = mmap.mmap(f.fileno(), 0, access=mmap.PROT_READ)

        for url in re.findall(URL_REGEX, m):
            if not is_whitelisted_domain(url[1]):
                urls.add("".join(url))

        return list(urls)
Example #8
0
    def get_urls(self):
        """Extract all URLs embedded in this file through a simple regex."""
        if not os.path.getsize(self.file_path):
            return []

        # http://stackoverflow.com/a/454589
        urls = set()
        f = open(self.file_path, "rb")
        m = mmap.mmap(f.fileno(), 0, access=mmap.PROT_READ)

        for url in re.findall(URL_REGEX, m):
            if not is_whitelisted_domain(url[1]):
                urls.add("".join(url))

        return list(urls)