예제 #1
0
 def test_multi(self):
     text = "aio blah blah  $$%^q 32 5432 3096 6798 2217 1740 \n"
     text += "CCNO: 4109 4533 4898 8521\n"
     text += "MY PAN%5162702975322904\n"
     self.assertEqual(panscan(text), [(29, "3096 6798 2217 1740"),
                                     (56, "4109 4533 4898 8521"),
                                     (83, "5162702975322904")])
예제 #2
0
def plugin(fname, data=None):
    try:
        if data != None:
            flo = StringIO.StringIO(data)
            wb = load_workbook(flo)
        else:
            wb = load_workbook(filename=fname, use_iterators=True)
    except InvalidFileException:
        logger.Logger().log_ignore(fname, "Invalid Excel file")
        return

    sheetNames = wb.get_sheet_names()

    for name in sheetNames:
        sheet = wb.get_sheet_by_name(name)
        for cell in sheet.get_cell_collection():
            if cell.value != None:
                # scan text for pans and return as a list
                value = str(cell.value)

                pans = panscan.panscan(" " + value + " ")

                # log each pan found - bit of a hack to see if the pans list is empty
                for p in pans:
                    logger.Logger().log_pan(fname, p[1], name + " Cell " + cell.column + ":" + str(cell.row))
예제 #3
0
파일: xlsx.py 프로젝트: btolab/ccsrch.py
def plugin(fname, data = None):
    try:
        if (data != None):
            flo = StringIO.StringIO(data)
            wb = load_workbook(flo)
        else:
            wb = load_workbook(filename = fname, use_iterators = True)
    except InvalidFileException:
        logger.Logger().log_ignore(fname, "Invalid Excel file")
        return
        
    sheetNames = wb.get_sheet_names()

    for name in sheetNames:
        sheet = wb.get_sheet_by_name(name)
        for cell in sheet.get_cell_collection():
            if (cell.value !=None):
                # scan text for pans and return as a list
                value = str(cell.value)
                
                pans = panscan.panscan(" " + value + " ")

                # log each pan found - bit of a hack to see if the pans list is empty
                for p in pans:
                    logger.Logger().log_pan(fname, p[1], name +" Cell " + cell.column +":"+ str(cell.row))
예제 #4
0
def plugin(file, data=None):
    if data is not None:
        try:
            filelikeObj = StringIO.StringIO(data)
        except Exception as e:
            Logger().log_error(e)
            return

        try:
            mydoc = zipfile.ZipFile(filelikeObj)
        except zipfile.BadZipfile as e:
            Logger().log_error(e)
            return
    else:
        try:
            mydoc = zipfile.ZipFile(file)
        except (zipfile.BadZipfile, zipfile.LargeZipFile) as e:
            Logger().log_error(e)
            return

    try:
        xmlcontent = mydoc.read('word/document.xml')
    except IOError as e:
        Logger().log_error(e)
        return
    try:
        document = etree.fromstring(xmlcontent)
    except Exception as e:
        Logger().log_error(e)
        return

    # Create a list of paragraphs
    paratextlist = []
    paralist = []
    for element in document.iter():
        if element.tag == '{' + nsprefixes['w'] + '}p':
            paralist.append(element)
    for para in paralist:
        paratext = u''
        for element in para.iter():
            if element.tag == '{' + nsprefixes['w'] + '}t':
                if element.text:
                    paratext = paratext + element.text
        if not len(paratext) == 0:

            paratextlist.append(paratext)

    # Scan and log
    paraNum = 1
    for i in paratextlist:
        pans = panscan.panscan(i)
        for p in pans:
            logger.Logger().log_pan(file, p, "Paragraph number " + i + ".")
        paraNum += 1
예제 #5
0
def plugin(file, data=None):
    if data is not None:
        try:
                filelikeObj = StringIO.StringIO(data)
        except Exception as e:
        	Logger().log_error(e)
                return

        try:
                mydoc = zipfile.ZipFile(filelikeObj)
        except zipfile.BadZipfile as e:
                Logger().log_error(e)
                return
    else:
        try:
                mydoc = zipfile.ZipFile(file)
        except (zipfile.BadZipfile, zipfile.LargeZipFile) as e:
                Logger().log_error(e)
                return

    try:                
        xmlcontent = mydoc.read('word/document.xml')
    except IOError as e:
        Logger().log_error(e)
        return
    try:
        document = etree.fromstring(xmlcontent)
    except Exception as e:
        Logger().log_error(e)
        return

    # Create a list of paragraphs
    paratextlist=[]
    paralist = []
    for element in document.iter():
        if element.tag == '{'+nsprefixes['w']+'}p':
            paralist.append(element)
    for para in paralist:
        paratext=u''
        for element in para.iter():
            if element.tag == '{'+nsprefixes['w']+'}t':
                if element.text:
                    paratext = paratext+element.text
        if not len(paratext) == 0:

            paratextlist.append(paratext)

    # Scan and log
    paraNum = 1
    for i in paratextlist:
        pans = panscan.panscan(i)
        for p in pans:
            logger.Logger().log_pan(file, p, "Paragraph number " + i +".")
        paraNum += 1
예제 #6
0
def plugin(fileName, data = None):
    
    if data == None:
        try:
            fd = file(fileName, "rb")
            pdf = PdfFileReader(fd)
        except Exception as e:
            Logger().log_error(e)
            return
    else:
        # handle data as a block
        # TODO test this actually works
        try:
            flo = StringIO.StringIO(data)
            pdf = PdfFileReader(flo)
        except Exception as e:
            Logger().log_error(e)
            return

    # create and zero contents string
    contents = ""

    # find how many pages input1 has:
    try:
        numPages = pdf.getNumPages()
    except Exception as e:
        Logger().log_error(e)
        return

    # iterate over all pages and extract text
    for i in range(numPages):

        # get next page from pdf and extract text as a single string        
        try:
            s = pdf.getPage(i).extractText() + "\n"
            s.encode("ascii", "ignore")
        except Exception as e:
            Logger().log_error(e)
            return
        
        # scan text for pans and return as a list
        pans = panscan(s)

        # log each pan found
        for p in pans:
            Logger().log_pan(fileName, p[1], "on page %s" % (i + 1))

    if data == None:
        fd.close()
예제 #7
0
파일: pdf.py 프로젝트: btolab/ccsrch.py
def plugin(fileName, data=None):

    if data == None:
        try:
            fd = file(fileName, "rb")
            pdf = PdfFileReader(fd)
        except Exception as e:
            Logger().log_error(e)
            return
    else:
        # handle data as a block
        # TODO test this actually works
        try:
            flo = StringIO.StringIO(data)
            pdf = PdfFileReader(flo)
        except Exception as e:
            Logger().log_error(e)
            return

    # create and zero contents string
    contents = ""

    # find how many pages input1 has:
    try:
        numPages = pdf.getNumPages()
    except Exception as e:
        Logger().log_error(e)
        return

    # iterate over all pages and extract text
    for i in range(numPages):

        # get next page from pdf and extract text as a single string
        try:
            s = pdf.getPage(i).extractText() + "\n"
            s.encode("ascii", "ignore")
        except Exception as e:
            Logger().log_error(e)
            return

        # scan text for pans and return as a list
        pans = panscan(s)

        # log each pan found
        for p in pans:
            Logger().log_pan(fileName, p[1], "on page %s" % (i + 1))

    if data == None:
        fd.close()
예제 #8
0
파일: txt.py 프로젝트: btolab/ccsrch.py
def plugin(filename, data):
    if not data:
        try:
            f = open(filename)
        except IOError as e:
            Logger().log_error(e)
            return
        try:
            data = f.read()
        except IOError as e:
            Logger().log_error(e)
        finally:
            f.close()

    results = panscan(data)
    for r in results:
        Logger().log_pan(filename, r[1], "at offset %d" % (r[0]))
예제 #9
0
def plugin(filename, data):
    if not data:
        try:
            f = open(filename)
        except IOError as e:
            Logger().log_error(e)
            return
        try:
            data = f.read()
        except IOError as e:
            Logger().log_error(e)
        finally:
            f.close()

    results = panscan(data)
    for r in results:
        Logger().log_pan(filename, r[1], "at offset %d" % (r[0]))
예제 #10
0
def parse(filename, fileType, data=None):
    """
    This will open the file, and then will proceed to parse myFile: String
    containing the path of the file to parse fileType:  just the extension of
    the file. I would parse it myself, but the caller will already know this"""
    try:
        if data:

            filelikeObj = StringIO.StringIO(data)
            myfile = zipfile.ZipFile(filelikeObj, "r", zipfile.ZIP_DEFLATED)

        else:
            myfile = zipfile.ZipFile(filename, 'r', zipfile.ZIP_DEFLATED)
    except zipfile.BadZipfile:
        logger.Logger().log_ignore(filename, "Bad zip file")
        return

    xmldata = myfile.read("content.xml")
    myfile.close()
    pans = panscan.panscan(xmldata)
    for p in pans:
        logger.Logger().log_pan(filename, p[1], "at offset %d" % p[0])
예제 #11
0
def parse(filename, fileType, data = None):
    """
    This will open the file, and then will proceed to parse myFile: String
    containing the path of the file to parse fileType:  just the extension of
    the file. I would parse it myself, but the caller will already know this"""
    try:
        if data:
            
            filelikeObj = StringIO.StringIO(data)
            myfile = zipfile.ZipFile(filelikeObj, "r", zipfile.ZIP_DEFLATED)

        else:
            myfile = zipfile.ZipFile(filename,'r', zipfile.ZIP_DEFLATED)
    except zipfile.BadZipfile:
        logger.Logger().log_ignore(filename, "Bad zip file")
        return
    
    xmldata = myfile.read("content.xml")
    myfile.close()
    pans = panscan.panscan(xmldata)
    for p in pans:
        logger.Logger().log_pan(filename, p[1], "at offset %d" % p[0])
예제 #12
0
def plugin(filename, data=None):
    if data is not None:

        book = xlrd.open_workbook(filename, file_contents=data)

    else:
        book = xlrd.open_workbook(filename)  # Open an .xls file

    # iterate over sheets
    for i in range(len(book.sheet_names())):
        sheet = book.sheet_by_index(i)
        for rownum in range(sheet.nrows):
            for column in range(len(sheet.row_values(rownum))):
                # scan text for pans and return as a list
                pans = panscan.panscan(" " + unicode(sheet.row_values(rownum)[column]) + " ")

                # log each pan found
                for p in pans:
                    logger.Logger().log_pan(
                        filename,
                        p[1],
                        "in sheet %s, in cell %s:%d" % (book.sheet_names()[i], intToChar(column), rownum),
                    )
예제 #13
0
파일: xls.py 프로젝트: btolab/ccsrch.py
def plugin(filename, data=None):
    try:
        if data is not None:
            book = xlrd.open_workbook(filename, file_contents=data)
        else:
            book = xlrd.open_workbook(filename)  # Open an .xls file
    except xlrd.biffh.XLRDError as e:
        Logger().log_error(e)
        return

    # iterate over sheets
    for i in range(len(book.sheet_names())):
        sheet = book.sheet_by_index(i)
        for rownum in range(sheet.nrows):
            for column in range(len(sheet.row_values(rownum))):
                # scan text for pans and return as a list
                pans = panscan.panscan(
                    " " + unicode(sheet.row_values(rownum)[column]) + " ")

                # log each pan found
                for p in pans:
                    Logger().log_pan(
                        filename, p[1], "in sheet %s, in cell %s:%d" %
                        (book.sheet_names()[i], inttochar(column), rownum))
예제 #14
0
 def test_all_numbers(self):
     text = "11238947524713897214 128951673475 435413  483576107"
     self.assertEqual(panscan(text), [])
예제 #15
0
 def test_no_numbers(self):
     text = "wiubfoiranvquwrhruehig uhegw\nweowiuhwfe  weoif$%&wgoirgh\n"
     self.assertEqual(panscan(text), [])
예제 #16
0
 def test_invalid_jcb2(self):
     text = "213118001438560 "
     self.assertEqual(panscan(text), [])
예제 #17
0
 def test_valid_jcb2(self):
     text = "2 1 3 1 1 8 0 0 0 4 0 8 5 6 0 "
     self.assertEqual(panscan(text), [(0, "2 1 3 1 1 8 0 0 0 4 0 8 5 6 0")])
예제 #18
0
 def test_valid_jcb(self):
     text = "3096 6798 2217 1740 "
     self.assertEqual(panscan(text), [(0, "3096 6798 2217 1740")])
예제 #19
0
 def test_invalid_short_visa(self):
     text = "4410 5535 6357 6 "
     self.assertEqual(panscan(text), [])
예제 #20
0
 def test_invalid_visa(self):
     text = "4109 4533 4898 8520\n"
     self.assertEqual(panscan(text), [])
예제 #21
0
 def test_valid_short_visa(self):
     text = "4410 5535 6357 5 "
     self.assertEqual(panscan(text), [(0, "4410 5535 6357 5")])
예제 #22
0
 def test_invalid_visa(self):
     text = "4109 4533 4898 8520\n"
     self.assertEqual(panscan(text), [])
예제 #23
0
 def test_valid_visa(self):
     text = "4109 4533 4898 8521\n"
     self.assertEqual(panscan(text), [(0, "4109 4533 4898 8521")])
예제 #24
0
 def test_invalid_amex34(self):
     text = "3401 1825 3784 805 "
     self.assertEqual(panscan(text), [])
예제 #25
0
 def test_valid_amex34(self):
     text = "3401 1825 3784 804 "
     self.assertEqual(panscan(text), [(0, "3401 1825 3784 804")])
예제 #26
0
 def test_invalid_mastercard(self):
     text = "8293396262570410"
     self.assertEqual(panscan(text), [])
예제 #27
0
 def test_valid_mastercard(self):
     text = "5162702975322904\n"
     self.assertEqual(panscan(text), [(0, "5162702975322904")])
예제 #28
0
 def test_invalid_jcb(self):
     text = "3096 6798 2217 1741 "
     self.assertEqual(panscan(text), [])
예제 #29
0
 def test_valid_amex37(self):
     text = "3730 4625 0745 132 "
     self.assertEqual(panscan(text), [(0, "3730 4625 0745 132")])
예제 #30
0
 def test_valid_short_visa(self):
     text = "4410 5535 6357 5 "
     self.assertEqual(panscan(text), [(0, "4410 5535 6357 5")])
예제 #31
0
 def test_valid_mastercard(self):
     text = "5162702975322904\n"
     self.assertEqual(panscan(text), [(0, "5162702975322904")])
예제 #32
0
 def test_invalid_amex37(self):
     text = "3730 4625 0745 133 "
     self.assertEqual(panscan(text), [])
예제 #33
0
 def test_valid_visa(self):
     text = "4109 4533 4898 8521\n"
     self.assertEqual(panscan(text), [(0, "4109 4533 4898 8521")])
예제 #34
0
 def test_valid_diners(self):
     text = "3001 7970 1170 97 "
     self.assertEqual(panscan(text), [(0, "3001 7970 1170 97")])
예제 #35
0
 def test_invalid_short_visa(self):
     text = "4410 5535 6357 6 "
     self.assertEqual(panscan(text), [])
예제 #36
0
 def test_invalid_diners(self):
     text = "3001 7970 1170 99 "
     self.assertEqual(panscan(text), [])