Esempio n. 1
0
 def carrier(x):
     carrier = tracking_url.guess_carrier(x)
     if not carrier or carrier.carrier not in ['usps', 'ups', 'fedex']:
         if 'TBA' == x[:3]:
             return 'amazon'
         return 'unknown'
     return carrier.carrier
Esempio n. 2
0
 def check_carrier(self, expected_carrier, tracking_numbers):
     for tracking_number in tracking_numbers:
         match = guess_carrier(tracking_number)
         self.assertIsNotNone(match,
                              msg='`{}` did not match {}'.format(
                                  tracking_number, expected_carrier))
         self.assertEqual(expected_carrier,
                          match.carrier,
                          msg='`{}` matched {} instead of {}'.format(
                              tracking_number, match.carrier,
                              expected_carrier))
 def nlp(self, line):
     trackid = []
     cardinal = []
     alpha = []
     alphanum = []
     lines = ((line).upper().split('\n'))
     testspace = '^[a-zA-Z0-9]{4}[ ]{1}[a-zA-Z0-9]{4}[ ]{1}[a-zA-Z0-9]{4}'
     for i in lines:
         wt = word_tokenize(i)
         words = (nltk.pos_tag(wt))
         for j in range(0, len(words)):
             if (words[j][1] == 'CD'):
                 temp = words[j][0]
                 k = 1
                 while ((j + k) < len1 and words[j + k][0][0] >= '0'
                        and words[j + k][0][0] <= '9'):
                     temp = temp + words[j + k][0]
                     k += 1
                 cardinal.append(temp)
             elif (re.match("^[a-zA-Z0-9]+(?:_[a-zA-Z0-9]+)?$",
                            words[j][0])):
                 temp = words[j][0]
                 k = 1
                 while ((j + k) < len1 and words[j + k][0][0] >= '0'
                        and words[j + k][0][0] <= '9'):
                     temp = temp + words[j + k][0]
                     k += 1
                 alphanum.append(temp)
             if (words[j][0].isalpha()):
                 alpha.append(words[j][0])
             if (re.match(testspace, words[j][0])):
                 alphanum.append((words[j][0]).replace(" ", ""))
     test = set(alphanum) - set(alpha)
     trackid = list(set().union(test, cardinal))
     trackid8 = []
     for i in trackid:
         if (len(i) > 8):
             trackid8.append(i)
     df = pd.read_excel(
         'data/regx.xlsx',
         sheetname=0)  # can also index sheet by name or fetch all sheets
     validation1 = df['pass'].tolist()
     validid = []
     for i in trackid8:
         for j in validation1:
             if (re.match(j, i)):
                 validid.append(i)
     for i in trackid8:
         match = tracking_url.guess_carrier(i)
         if match is None:
             pass
         else:
             validid.append(i)
     return (list(set(validid)))
 def check_carrier(self, expected_carrier, tracking_numbers):
     for tracking_number in tracking_numbers:
         match = guess_carrier(tracking_number)
         self.assertIsNotNone(match, tracking_number)
         self.assertEqual(expected_carrier, match.carrier)