def test_multi(self): text = "aio blah blah $$%^q 32 5432 3096 6798 2217 1740 \n" text += "CCNO: 4109 4533 4898 8521\n" text += "MY PAN%5162702975322904\n" self.assertEqual(panscan(text), [(29, "3096 6798 2217 1740"), (56, "4109 4533 4898 8521"), (83, "5162702975322904")])
def plugin(fname, data=None): try: if data != None: flo = StringIO.StringIO(data) wb = load_workbook(flo) else: wb = load_workbook(filename=fname, use_iterators=True) except InvalidFileException: logger.Logger().log_ignore(fname, "Invalid Excel file") return sheetNames = wb.get_sheet_names() for name in sheetNames: sheet = wb.get_sheet_by_name(name) for cell in sheet.get_cell_collection(): if cell.value != None: # scan text for pans and return as a list value = str(cell.value) pans = panscan.panscan(" " + value + " ") # log each pan found - bit of a hack to see if the pans list is empty for p in pans: logger.Logger().log_pan(fname, p[1], name + " Cell " + cell.column + ":" + str(cell.row))
def plugin(fname, data = None): try: if (data != None): flo = StringIO.StringIO(data) wb = load_workbook(flo) else: wb = load_workbook(filename = fname, use_iterators = True) except InvalidFileException: logger.Logger().log_ignore(fname, "Invalid Excel file") return sheetNames = wb.get_sheet_names() for name in sheetNames: sheet = wb.get_sheet_by_name(name) for cell in sheet.get_cell_collection(): if (cell.value !=None): # scan text for pans and return as a list value = str(cell.value) pans = panscan.panscan(" " + value + " ") # log each pan found - bit of a hack to see if the pans list is empty for p in pans: logger.Logger().log_pan(fname, p[1], name +" Cell " + cell.column +":"+ str(cell.row))
def plugin(file, data=None): if data is not None: try: filelikeObj = StringIO.StringIO(data) except Exception as e: Logger().log_error(e) return try: mydoc = zipfile.ZipFile(filelikeObj) except zipfile.BadZipfile as e: Logger().log_error(e) return else: try: mydoc = zipfile.ZipFile(file) except (zipfile.BadZipfile, zipfile.LargeZipFile) as e: Logger().log_error(e) return try: xmlcontent = mydoc.read('word/document.xml') except IOError as e: Logger().log_error(e) return try: document = etree.fromstring(xmlcontent) except Exception as e: Logger().log_error(e) return # Create a list of paragraphs paratextlist = [] paralist = [] for element in document.iter(): if element.tag == '{' + nsprefixes['w'] + '}p': paralist.append(element) for para in paralist: paratext = u'' for element in para.iter(): if element.tag == '{' + nsprefixes['w'] + '}t': if element.text: paratext = paratext + element.text if not len(paratext) == 0: paratextlist.append(paratext) # Scan and log paraNum = 1 for i in paratextlist: pans = panscan.panscan(i) for p in pans: logger.Logger().log_pan(file, p, "Paragraph number " + i + ".") paraNum += 1
def plugin(file, data=None): if data is not None: try: filelikeObj = StringIO.StringIO(data) except Exception as e: Logger().log_error(e) return try: mydoc = zipfile.ZipFile(filelikeObj) except zipfile.BadZipfile as e: Logger().log_error(e) return else: try: mydoc = zipfile.ZipFile(file) except (zipfile.BadZipfile, zipfile.LargeZipFile) as e: Logger().log_error(e) return try: xmlcontent = mydoc.read('word/document.xml') except IOError as e: Logger().log_error(e) return try: document = etree.fromstring(xmlcontent) except Exception as e: Logger().log_error(e) return # Create a list of paragraphs paratextlist=[] paralist = [] for element in document.iter(): if element.tag == '{'+nsprefixes['w']+'}p': paralist.append(element) for para in paralist: paratext=u'' for element in para.iter(): if element.tag == '{'+nsprefixes['w']+'}t': if element.text: paratext = paratext+element.text if not len(paratext) == 0: paratextlist.append(paratext) # Scan and log paraNum = 1 for i in paratextlist: pans = panscan.panscan(i) for p in pans: logger.Logger().log_pan(file, p, "Paragraph number " + i +".") paraNum += 1
def plugin(fileName, data = None): if data == None: try: fd = file(fileName, "rb") pdf = PdfFileReader(fd) except Exception as e: Logger().log_error(e) return else: # handle data as a block # TODO test this actually works try: flo = StringIO.StringIO(data) pdf = PdfFileReader(flo) except Exception as e: Logger().log_error(e) return # create and zero contents string contents = "" # find how many pages input1 has: try: numPages = pdf.getNumPages() except Exception as e: Logger().log_error(e) return # iterate over all pages and extract text for i in range(numPages): # get next page from pdf and extract text as a single string try: s = pdf.getPage(i).extractText() + "\n" s.encode("ascii", "ignore") except Exception as e: Logger().log_error(e) return # scan text for pans and return as a list pans = panscan(s) # log each pan found for p in pans: Logger().log_pan(fileName, p[1], "on page %s" % (i + 1)) if data == None: fd.close()
def plugin(fileName, data=None): if data == None: try: fd = file(fileName, "rb") pdf = PdfFileReader(fd) except Exception as e: Logger().log_error(e) return else: # handle data as a block # TODO test this actually works try: flo = StringIO.StringIO(data) pdf = PdfFileReader(flo) except Exception as e: Logger().log_error(e) return # create and zero contents string contents = "" # find how many pages input1 has: try: numPages = pdf.getNumPages() except Exception as e: Logger().log_error(e) return # iterate over all pages and extract text for i in range(numPages): # get next page from pdf and extract text as a single string try: s = pdf.getPage(i).extractText() + "\n" s.encode("ascii", "ignore") except Exception as e: Logger().log_error(e) return # scan text for pans and return as a list pans = panscan(s) # log each pan found for p in pans: Logger().log_pan(fileName, p[1], "on page %s" % (i + 1)) if data == None: fd.close()
def plugin(filename, data): if not data: try: f = open(filename) except IOError as e: Logger().log_error(e) return try: data = f.read() except IOError as e: Logger().log_error(e) finally: f.close() results = panscan(data) for r in results: Logger().log_pan(filename, r[1], "at offset %d" % (r[0]))
def parse(filename, fileType, data=None): """ This will open the file, and then will proceed to parse myFile: String containing the path of the file to parse fileType: just the extension of the file. I would parse it myself, but the caller will already know this""" try: if data: filelikeObj = StringIO.StringIO(data) myfile = zipfile.ZipFile(filelikeObj, "r", zipfile.ZIP_DEFLATED) else: myfile = zipfile.ZipFile(filename, 'r', zipfile.ZIP_DEFLATED) except zipfile.BadZipfile: logger.Logger().log_ignore(filename, "Bad zip file") return xmldata = myfile.read("content.xml") myfile.close() pans = panscan.panscan(xmldata) for p in pans: logger.Logger().log_pan(filename, p[1], "at offset %d" % p[0])
def parse(filename, fileType, data = None): """ This will open the file, and then will proceed to parse myFile: String containing the path of the file to parse fileType: just the extension of the file. I would parse it myself, but the caller will already know this""" try: if data: filelikeObj = StringIO.StringIO(data) myfile = zipfile.ZipFile(filelikeObj, "r", zipfile.ZIP_DEFLATED) else: myfile = zipfile.ZipFile(filename,'r', zipfile.ZIP_DEFLATED) except zipfile.BadZipfile: logger.Logger().log_ignore(filename, "Bad zip file") return xmldata = myfile.read("content.xml") myfile.close() pans = panscan.panscan(xmldata) for p in pans: logger.Logger().log_pan(filename, p[1], "at offset %d" % p[0])
def plugin(filename, data=None): if data is not None: book = xlrd.open_workbook(filename, file_contents=data) else: book = xlrd.open_workbook(filename) # Open an .xls file # iterate over sheets for i in range(len(book.sheet_names())): sheet = book.sheet_by_index(i) for rownum in range(sheet.nrows): for column in range(len(sheet.row_values(rownum))): # scan text for pans and return as a list pans = panscan.panscan(" " + unicode(sheet.row_values(rownum)[column]) + " ") # log each pan found for p in pans: logger.Logger().log_pan( filename, p[1], "in sheet %s, in cell %s:%d" % (book.sheet_names()[i], intToChar(column), rownum), )
def plugin(filename, data=None): try: if data is not None: book = xlrd.open_workbook(filename, file_contents=data) else: book = xlrd.open_workbook(filename) # Open an .xls file except xlrd.biffh.XLRDError as e: Logger().log_error(e) return # iterate over sheets for i in range(len(book.sheet_names())): sheet = book.sheet_by_index(i) for rownum in range(sheet.nrows): for column in range(len(sheet.row_values(rownum))): # scan text for pans and return as a list pans = panscan.panscan( " " + unicode(sheet.row_values(rownum)[column]) + " ") # log each pan found for p in pans: Logger().log_pan( filename, p[1], "in sheet %s, in cell %s:%d" % (book.sheet_names()[i], inttochar(column), rownum))
def test_all_numbers(self): text = "11238947524713897214 128951673475 435413 483576107" self.assertEqual(panscan(text), [])
def test_no_numbers(self): text = "wiubfoiranvquwrhruehig uhegw\nweowiuhwfe weoif$%&wgoirgh\n" self.assertEqual(panscan(text), [])
def test_invalid_jcb2(self): text = "213118001438560 " self.assertEqual(panscan(text), [])
def test_valid_jcb2(self): text = "2 1 3 1 1 8 0 0 0 4 0 8 5 6 0 " self.assertEqual(panscan(text), [(0, "2 1 3 1 1 8 0 0 0 4 0 8 5 6 0")])
def test_valid_jcb(self): text = "3096 6798 2217 1740 " self.assertEqual(panscan(text), [(0, "3096 6798 2217 1740")])
def test_invalid_short_visa(self): text = "4410 5535 6357 6 " self.assertEqual(panscan(text), [])
def test_invalid_visa(self): text = "4109 4533 4898 8520\n" self.assertEqual(panscan(text), [])
def test_valid_short_visa(self): text = "4410 5535 6357 5 " self.assertEqual(panscan(text), [(0, "4410 5535 6357 5")])
def test_valid_visa(self): text = "4109 4533 4898 8521\n" self.assertEqual(panscan(text), [(0, "4109 4533 4898 8521")])
def test_invalid_amex34(self): text = "3401 1825 3784 805 " self.assertEqual(panscan(text), [])
def test_valid_amex34(self): text = "3401 1825 3784 804 " self.assertEqual(panscan(text), [(0, "3401 1825 3784 804")])
def test_invalid_mastercard(self): text = "8293396262570410" self.assertEqual(panscan(text), [])
def test_valid_mastercard(self): text = "5162702975322904\n" self.assertEqual(panscan(text), [(0, "5162702975322904")])
def test_invalid_jcb(self): text = "3096 6798 2217 1741 " self.assertEqual(panscan(text), [])
def test_valid_amex37(self): text = "3730 4625 0745 132 " self.assertEqual(panscan(text), [(0, "3730 4625 0745 132")])
def test_invalid_amex37(self): text = "3730 4625 0745 133 " self.assertEqual(panscan(text), [])
def test_valid_diners(self): text = "3001 7970 1170 97 " self.assertEqual(panscan(text), [(0, "3001 7970 1170 97")])
def test_invalid_diners(self): text = "3001 7970 1170 99 " self.assertEqual(panscan(text), [])