コード例 #1
0
def id(pdf):
    try:
        #(dir, allNames, extraData, disarm, force), force)
        command = pdfid.PDFiD2String(pdfid.PDFiD(pdf, True, True, False, True),
                                     True)
        print command
    except Exception:
        # I've observed some files raising errors with the 'extraData' switch
        command = pdfid.PDFiD2String(
            pdfid.PDFiD(pdf, True, False, False, True), True)
        print "[!] PDFiD couldn\'t parse extra data"
        print command
コード例 #2
0
    def run_pdfid(self, data):
        """
        Uses PDFid to generate stats for the PDF
        - Display keyword matches
        """
        xml_json_success = True

        xml_data = pdfid.PDFiD(data)
        try:
            json_data = pdfid.PDFiD2JSON(xml_data, '')
            pdfid_dict = json.loads(json_data)[0]
        except UnicodeDecodeError:
            xml_json_success = False

        if xml_json_success:
            try:
                for item in pdfid_dict['pdfid']['keywords']['keyword']:
                    self._add_result('pdfid', item['name'],
                                     {'count': item['count']})
            except KeyError:
                pass
        else:
            for count, item in re.findall(
                    r'<Keyword\sCount="([^\"]+)"[^>]+Name=\"([^\"]+)\"',
                    xml_data.toxml()):
                self._add_result('pdfid', item, {'count': count})
コード例 #3
0
def pdfid(msg, fileName):
    if msg.find("(.PDF)") > -1:
        import pdfid

        return pdfid.PDFiD2String(pdfid.PDFiD(fileName), False)
コード例 #4
0
def id(pdf):
    try:
        # (dir, allNames, extraData, disarm, force), force)
        command = pdfid.PDFiD2String(pdfid.PDFiD(pdf, True, True, False, True),
                                     True,
                                     force=False)
        extra = True
    except Exception:
        # I've observed some files raising errors with the 'extraData' switch
        command = pdfid.PDFiD2String(pdfid.PDFiD(pdf, True, False, False,
                                                 True),
                                     True,
                                     force=False)
        print("[!] PDFiD couldn\'t parse extra data")
        extra = False

    for line in command.split('\n'):
        count = re.split(r'[\s]+', line)
        if "PDF Header" in line and not re.match('%PDF-1\.\d', count[3]):
            counter.append("header")
            print("[-] Invalid version number : \"%s\"" % count[3])
        elif "/Page " in line:
            page_counter.append(count[2])
        elif "/Pages " in line:
            page_counter.append(count[2])
        elif "/JS " in line and not re.match('0', count[2]):
            counter.append("js")
            print("[-] JavaScript count.......: %s" % count[2])
            if count[2] > "1":
                counter.append("mucho_javascript")
                print("\t[*] That\'s a lot of js ...")
        elif "/AcroForm " in line and not re.match('0', count[2]):
            counter.append("acroform")
            print("[-] AcroForm...............: %s" % count[2])
        elif "/AA " in line and not re.match('0', count[2]):
            counter.append("aa")
            print("[-] Additional Action......: %s" % count[2])
        elif "/OpenAction " in line and not re.match('0', count[2]):
            counter.append("oa")
            print("[-] Open Action............: %s" % count[2])
        elif "/Launch " in line and not re.match('0', count[2]):
            counter.append("launch")
            print("[-] Launch Action..........: %s" % count[2])
        elif "/EmbeddedFiles " in line and not re.match('0', count[2]):
            counter.append("embed")
            print("[-] Embedded File..........: %s" % count[2])
        #elif "trailer" in line and not re.match('0|1', count[2]):
        #    print("[-] Trailer count..........: %s" % count[2])
        #    print("\t[*] Multiple versions detected")
        elif "Total entropy:" in line:
            tentropy = count[3]
            print("[-] Total Entropy..........: %7s" % count[3])
        elif "Entropy inside streams:" in line:
            ientropy = count[4]
            print("[-] Entropy inside streams : %7s" % count[4])
        elif "Entropy outside streams:" in line:
            oentropy = count[4]
            print("[-] Entropy outside streams: %7s" % count[4])
    """
    Entropy levels:
    0 = orderly, 8 = random
    ASCII text file = ~2/4
    ZIP archive = ~ 7/8
    PDF Malicious
            - total   : 6.3
            - inside  : 6.6
            - outside : 4.9
    PDF Benign
            - total   : 6.7
            - inside  : 7.2
            - outside : 5.1
    Determine if Total Entropy & Entropy Inside Stream are significantly different than Entropy Outside Streams -> i.e. might indicate a payload w/ long, uncompressed NOP-sled
    ref = http://blog.didierstevens.com/2009/05/14/malformed-pdf-documents
    """
    if extra and any(e.strip() == 'N/A'
                     for e in [tentropy, ientropy, oentropy]):
        print("[-] This file either didnt contain any streams")
        print("    Either this file is a tiny example pdf (unlikely), or it")
        print("    is intensionally hiding something")
        counter.append('entropy')
    elif extra:
        te_long = Decimal(tentropy)
        te_short = Decimal(tentropy[0:3])
        ie_long = Decimal(ientropy)
        ie_short = Decimal(ientropy[0:3])
        oe_long = Decimal(oentropy)
        oe_short = Decimal(oentropy[0:3])
        ent = (te_short + ie_short) / 2
        # I know 'entropy' might get added twice to the counter (doesn't matter) but I wanted to separate these to be alerted on them individually
        togo = (8 - oe_long
                )  # Don't want to apply this if it goes over the max of 8
        if togo > 2:
            if oe_long + 2 > te_long:
                counter.append("entropy")
                print("\t[*] Entropy of outside stream is questionable:")
                print("\t[-] Outside (%s) +2 (%s) > Total (%s)" %
                      (oe_long, oe_long + 2, te_long))
        elif oe_long > te_long:
            counter.append("entropy")
            print("\t[*] Entropy of outside stream is questionable:")
            print("\t[-] Outside (%s) > Total (%s)" % (oe_long, te_long))
        if str(te_short) <= "2.0" or str(ie_short) <= "2.0":
            counter.append("entropy")
            print("\t[*] LOW entropy detected:")
            print("\t[-] Total (%s) or Inside (%s) <= 2.0" %
                  (te_short, ie_short))

    # Process the /Page(s) results here just to make sure they were both read
    if re.match('0', page_counter[0]) and re.match('0', page_counter[1]):
        counter.append("page")
        print("[-] Page count suspicious:")
        print("\t[*] Both /Page (%s) and /Pages (%s) = 0" %
              (page_counter[0], page_counter[1]))
    elif re.match('0', page_counter[0]) and not re.match('0', page_counter[1]):
        counter.append("page")
        print("[-] Page count suspicious, no individual pages defined:")
        print("\t[*] /Page = (%s) , /Pages = (%s)" %
              (page_counter[0], page_counter[1]))
    elif re.match('1$', page_counter[0]):
        counter.append("page")
        print("[-] (1) page PDF")

    yarascan(pdf)