class EmailToOrders:

  def __init__(self, config):
    self.retriever = ObjectRetriever(config)
    self.email_to_orders: Dict[str, List[Order]] = self.retriever.load(EMAIL_TO_ORDERS_FILENAME)

  @debounce(1)
  def flush(self):
    self.retriever.flush(self.email_to_orders, EMAIL_TO_ORDERS_FILENAME)

  def get_orders(self, mail, email_id):
    if email_id not in self.email_to_orders:
      _, data = mail.uid("FETCH", email_id, "(RFC822)")
      msg = email.message_from_string(str(data[0][1], 'utf-8'))
      date = datetime.datetime.strptime(msg['Date'],
                                        '%a, %d %b %Y %H:%M:%S %z').strftime('%Y-%m-%d')
      to_email = str(msg['To']).replace('<', '').replace('>', '')
      raw_email = str(data[0][1]).replace("=3D", "=").replace('=\\r\\n',
                                                              '').replace('\\r\\n',
                                                                          '').replace('&amp;', '&')
      order_ids = AmazonTrackingRetriever.get_order_ids_from_email(AmazonTrackingRetriever,
                                                                   raw_email)
      self.email_to_orders[email_id] = [
          Order(order_id, date, to_email, False) for order_id in order_ids
      ]
      self.flush()
    return self.email_to_orders[email_id]
class NonPortalReimbursements:
    def __init__(self, config):
        self.retriever = ObjectRetriever(config)
        self.trackings_to_costs: Dict[Tuple[str], Tuple[
            str, float]] = self.retriever.load(NON_PORTAL_TRACKINGS_FILENAME)
        self.po_to_cost: Dict[str, float] = self.retriever.load(
            NON_PORTAL_POS_FILENAME)

    def flush(self):
        self.retriever.flush(self.trackings_to_costs,
                             NON_PORTAL_TRACKINGS_FILENAME)
        self.retriever.flush(self.po_to_cost, NON_PORTAL_POS_FILENAME)
class ArchiveManager:

  def __init__(self, config):
    self.retriever = ObjectRetriever(config)
    self.archive_dict = self.retriever.load(ARCHIVES_FILENAME)

  def get_archive(self, name):
    return self.archive_dict[name]

  def has_archive(self, name):
    return name in self.archive_dict

  def put_archive(self, name, trackings_info, po_cost) -> None:
    self.archive_dict[name] = (trackings_info, po_cost)
    self.retriever.flush(self.archive_dict, ARCHIVES_FILENAME)
class TrackingOutput:
    def __init__(self, config) -> None:
        self.retriever = ObjectRetriever(config)

    def save_trackings(self, trackings, overwrite=False) -> None:
        old_trackings = self.get_existing_trackings()
        merged_trackings = self.merge_trackings(old_trackings, trackings,
                                                overwrite)
        self._write_merged(merged_trackings)

    def get_tracking(self, tracking_number) -> Optional[Tracking]:
        """Returns the tracking object with the given tracking number if it exists."""
        existing_trackings = self.get_existing_trackings()
        for tracking in existing_trackings:
            if tracking.tracking_number == tracking_number:
                return tracking
        return None

    def _write_merged(self, merged_trackings) -> None:
        groups_dict = collections.defaultdict(list)
        for tracking in merged_trackings:
            groups_dict[tracking.group].append(tracking)

        self.retriever.flush(groups_dict, TRACKINGS_FILENAME)

    # Adds each Tracking object to the appropriate group
    # if there isn't already an entry for that tracking number
    def merge_trackings(self, old_trackings: List[Tracking],
                        trackings: List[Tracking],
                        overwrite: bool) -> List[Tracking]:
        new_tracking_dict = {t.tracking_number: t for t in old_trackings}
        for tracking in trackings:
            if tracking.tracking_number not in new_tracking_dict or overwrite:
                new_tracking_dict[tracking.tracking_number] = tracking
        return list(new_tracking_dict.values())

    def get_existing_trackings(self) -> List[Tracking]:
        trackings_dict = self.retriever.load(TRACKINGS_FILENAME)
        return self._convert_to_list(trackings_dict)

    def _convert_to_list(self, trackings_dict):
        result = []
        for trackings in trackings_dict.values():
            result.extend(trackings)
        for tracking in result:
            tracking.tracking_number = tracking.tracking_number.upper()
        return result
class EmailToOrders:
    def __init__(self, config):
        self.retriever = ObjectRetriever(config)
        self.email_to_orders: Dict[str, List[Order]] = self.retriever.load(
            EMAIL_TO_ORDERS_FILENAME)

    @debounce(1)
    def flush(self):
        self.retriever.flush(self.email_to_orders, EMAIL_TO_ORDERS_FILENAME)

    def get_orders(self, mail, email_id):
        if email_id not in self.email_to_orders:
            for attempt in range(3):
                try:
                    _, data = mail.uid("FETCH", email_id, "(RFC822)")
                    break
                except:
                    tqdm.write(
                        f"Got exception, attempting retry up to 3 times...\n{util.get_traceback_lines()}"
                    )
                    mail = email_auth.email_authentication()
                    mail.select('"[Gmail]/All Mail"')
            else:
                raise Exception("Exceeded retry limit")

            msg = email.message_from_string(str(data[0][1], 'utf-8'))
            date = datetime.datetime.strptime(
                msg['Date'], '%a, %d %b %Y %H:%M:%S %z').strftime('%Y-%m-%d')
            to_email = str(msg['To']).replace('<', '').replace('>', '')
            raw_email = str(data[0][1]).replace("=3D", "=").replace(
                '=\\r\\n', '').replace('\\r\\n', '').replace('&amp;', '&')
            order_ids = AmazonTrackingRetriever.get_order_ids_from_email(
                AmazonTrackingRetriever, raw_email)
            self.email_to_orders[email_id] = [
                Order(order_id, date, to_email, False)
                for order_id in order_ids
            ]
            self.flush()
        return self.email_to_orders[email_id]
Exemple #6
0
class OrderInfoRetriever:
  """
  A class that parses and stores the order numbers and email IDs for shipments.
  """

  def __init__(self, config) -> None:
    self.retriever = ObjectRetriever(config)
    self.orders_dict = self.retriever.load(ORDERS_FILENAME)
    self.mail = self.load_mail()

  def load_mail(self):
    mail = email_auth.email_authentication()
    mail.select('"[Google Mail]/All Mail"')
    return mail

  @debounce(5)
  def flush(self) -> None:
    self.retriever.flush(self.orders_dict, ORDERS_FILENAME)

  def get_order_info(self, order_id, fetch_from_email: bool = True) -> OrderInfo:
    # Always fetch if we've never seen this order before, additionally fetch iff
    # we found a 0 or MISSING_COST_SENTINEL cost before and we want to retry.
    if order_id not in self.orders_dict or (
        fetch_from_email and (self.orders_dict[order_id].cost == 0 or
                              isclose(self.orders_dict[order_id].cost, MISSING_COST_SENTINEL))):
      from_email = self.load_order_total(order_id)
      if not from_email:
        from_email = {order_id: OrderInfo(None, MISSING_COST_SENTINEL)}
      self.orders_dict.update(from_email)
      self.flush()
    return self.orders_dict[order_id]

  def load_order_total(self, order_id: str) -> Dict[str, OrderInfo]:
    if order_id.startswith("BBY01"):
      return self.load_order_total_bb(order_id)
    else:
      return self.load_order_total_amazon(order_id)

  def load_order_total_bb(self, order_id: str) -> Dict[str, OrderInfo]:
    from_email = '*****@*****.**'
    email_id, email_str = self.get_relevant_raw_email_data(order_id, from_email)
    if not email_str:
      print("Could not find email for order ID %s" % order_id)
      return {}

    regex_subtotal = r'Subtotal[^\$]*\$([\d,]+\.[\d]{2})'
    regex_tax = r'Tax[^\$]*\$([\d,]+\.[\d]{2})'
    subtotal_match = re.search(regex_subtotal, email_str)
    if not subtotal_match:
      return {}
    subtotal = float(subtotal_match.group(1).replace(',', ''))
    tax_match = re.search(regex_tax, email_str)
    if not tax_match:
      return {}
    tax = float(tax_match.group(1).replace(',', ''))
    return {order_id: OrderInfo(email_id, subtotal + tax)}

  def load_order_total_amazon(self, order_id: str) -> Dict[str, OrderInfo]:
    email_id, email_str = self.get_relevant_raw_email_data(order_id, '*****@*****.**')
    if not email_str:
      tqdm.write(f"Could not find email for order ID {order_id}.")
      return {}

    regex_pretax = r'Total Before Tax:[^$]*\$([\d,]+\.\d{2})'
    regex_est_tax = r'Estimated Tax:[^$]*\$([\d,]+\.\d{2})'
    regex_order_total = r'Order Total:[^$]*\$([\d,]+\.\d{2})'
    regex_order = r'(\d{3}-\d{7}-\d{7})'

    orders_with_duplicates = re.findall(regex_order, email_str)
    orders = []
    for order in orders_with_duplicates:
      if order not in orders:
        orders.append(order)

    # Sometimes it's been split into multiple orders. Find totals for each
    pretax_totals = [float(cost.replace(',', '')) for cost in re.findall(regex_pretax, email_str)]

    if pretax_totals:
      taxes = [float(cost.replace(',', '')) for cost in re.findall(regex_est_tax, email_str)]
      order_infos = [OrderInfo(email_id, t[0] + t[1]) for t in zip(pretax_totals, taxes)]
      return dict(zip(orders, order_infos))
    else:
      # personal emails might not have the regexes, need to do something different
      personal_result = self.get_personal_amazon_totals(email_id, email_str, orders)
      if personal_result:
        return personal_result
      else:
        # amazon sometimes uses a new, odd format that only shows a single order total
        overall_totals = [
            float(cost.replace(',', '')) for cost in re.findall(regex_order_total, email_str)
        ]
        order_infos = [OrderInfo(email_id, t) for t in overall_totals]
        return dict(zip(orders, order_infos))

  def get_relevant_raw_email_data(self, order_id: str, from_email: str) -> Tuple[Optional[str], Optional[str]]:
    status, search_result = self.mail.uid('SEARCH', None, f'BODY "{order_id}"',
                                          f'FROM "{from_email}"')
    email_id = search_result[0]
    if not email_id:
      return None, None

    email_ids = search_result[0].decode('utf-8').split()
    if not email_ids:
      return None, None

    email_str = email_tracking_retriever.get_email_content(email_ids[0], self.mail)
    email_str = email_str.replace('\r\n', '')
    return email_ids[0], email_str

  def get_personal_amazon_totals(self, email_id, email_str, orders) -> Dict[str, OrderInfo]:
    soup = BeautifulSoup(email_str, features="html.parser")
    prices = [
        elem.getText().strip().replace(',', '').replace('$', '')
        for elem in soup.find_all('td', {"class": "price"})
    ]
    prices = [float(price) for price in prices if price]

    result = {}
    # prices alternate between pretax / tax
    for i in range(len(prices) // 2):
      total = prices[i * 2] + prices[i * 2 + 1]
      result[orders[i]] = OrderInfo(email_id, total)
    return result
Exemple #7
0
class CancelledItemsRetriever:
    def __init__(self, config):
        self.retriever = ObjectRetriever(config)
        # map of {email_id: {order_id: cancelled_items}}
        self.email_id_dict = self.retriever.load(CANCELLATIONS_FILENAME)

    # returns map of order_id ->
    def get_cancelled_items(self) -> Dict[str, List[str]]:
        mail = self.load_mail()
        all_email_ids = self.get_all_email_ids(mail)

        result = {}
        for email_id, canc_info in tqdm(all_email_ids.items(),
                                        desc="Fetching cancellations",
                                        unit="email"):
            if email_id not in self.email_id_dict:
                email_result = self.get_cancellations_from_email(
                    mail, email_id, canc_info)
                if email_result:
                    self.email_id_dict[email_id] = email_result
                    self.flush()
                else:
                    continue

            order_to_cancelled_items = self.email_id_dict[email_id]
            for order_id in order_to_cancelled_items:
                if order_id not in result:
                    result[order_id] = []
                result[order_id].extend(order_to_cancelled_items[order_id])

        return result

    @retry(stop=stop_after_attempt(4),
           wait=wait_exponential(multiplier=1, min=2, max=120))
    def get_all_email_ids(self, mail) -> Dict[str, Tuple[CancFmt, CancQty]]:
        subject_searches = {
            ('Your Amazon.com order', 'has been canceled'):
            (CancFmt.IRRELEVANT, CancQty.NO),
            ('Your Amazon.com Order', 'Has Been Canceled'):
            (CancFmt.IRRELEVANT, CancQty.NO),
            ('Your Amazon.com Order', 'Has Been Cancelled'):
            (CancFmt.IRRELEVANT, CancQty.NO),
            ('Your AmazonSmile order', 'has been canceled'):
            (CancFmt.IRRELEVANT, CancQty.NO),
            ('Your AmazonSmile order', 'has been cancelled'):
            (CancFmt.IRRELEVANT, CancQty.NO),
            ('Item canceled for your Amazon.com order', ): (CancFmt.IRRELEVANT,
                                                            CancQty.NO),
            (
                "Successful cancellation of",
                "from your AmazonSmile order",
            ): (CancFmt.VOLUNTARY, CancQty.YES),
            (
                "Successful cancellation of",
                "from your Amazon.com order",
            ): (CancFmt.VOLUNTARY, CancQty.YES),
            ("Partial item(s) cancellation from your Amazon.com order", ):
            (CancFmt.VOLUNTARY, CancQty.NO),
            ("item has been canceled from your AmazonSmile order", ):
            (CancFmt.INVOLUNTARY, CancQty.NO),
            ("items have been canceled from your AmazonSmile order", ):
            (CancFmt.INVOLUNTARY, CancQty.NO),
            ("items have been canceled from your Amazon.com order", ):
            (CancFmt.INVOLUNTARY, CancQty.NO),
            ("item has been canceled from your Amazon.com order", ):
            (CancFmt.INVOLUNTARY, CancQty.NO)
        }
        result_ids = dict()
        for search_terms, canc_info in subject_searches.items():
            search_terms = [f'(SUBJECT "{phrase}")' for phrase in search_terms]
            status, response = mail.uid('SEARCH', None, *search_terms)
            email_ids = response[0].decode('utf-8')
            for email_id in email_ids.split():
                result_ids[email_id] = canc_info
        return result_ids

    @retry(stop=stop_after_attempt(4),
           wait=wait_exponential(multiplier=1, min=2, max=120))
    def get_cancellations_from_email(
            self, mail, email_id: str,
            canc_info: Tuple[CancFmt, CancQty]) -> Dict[str, List[str]]:
        try:
            result, data = mail.uid("FETCH", email_id, "(RFC822)")
        except Exception as e:
            raise Exception(f"Error retrieving email UID {email_id}") from e
        try:
            raw_email = data[0][1]
            orders = re.findall("(\d{3}-\d{7}-\d{7})", str(raw_email))
            if not orders:
                return {}
            order = orders[0]

            cancelled_items = []
            soup = BeautifulSoup(quopri.decodestring(raw_email),
                                 features="html.parser",
                                 from_encoding="iso-8859-1")

            if canc_info[0] == CancFmt.VOLUNTARY:
                cancelled_header = soup.find("h3", text="Canceled Items")
            elif canc_info[0] == CancFmt.INVOLUNTARY:
                cancelled_header = soup.find("span", text="Canceled Items")
            elif canc_info[0] == CancFmt.IRRELEVANT:
                return {order: []}
            else:
                raise Exception(
                    f"Can't handle cancellation format {canc_info[0]}")
            parent = cancelled_header.parent.parent.parent
            cancelled_items = []
            for li in parent.find_all('li'):
                # Each li contains a single link whose link text is the item name.
                canc_item = li.find('a').text.strip()
                # If cancellation email format contains quantity info, then use the string from
                # Amazon as-is, otherwise prepend with "??" to indicate indeterminate quantity.
                cancelled_items.append(canc_item if canc_info[1] ==
                                       CancQty.YES else f"?? {canc_item}")
            return {order: cancelled_items}
        except Exception as e:
            msg = email.message_from_string(str(data[0][1], 'utf-8'))
            print(
                f"Received exception with message '{str(e)}' when processing cancellation email with subject {msg['Subject']}:"
            )
            traceback.print_exc(file=sys.stdout)
            print("Continuing...")
            return None

    @retry(stop=stop_after_attempt(4),
           wait=wait_exponential(multiplier=1, min=2, max=120))
    def load_mail(self):
        mail = email_auth.email_authentication()
        mail.select('"[Gmail]/All Mail"')
        return mail

    @retry(stop=stop_after_attempt(4),
           wait=wait_exponential(multiplier=1, min=2, max=120))
    @debounce(2)
    def flush(self) -> None:
        self.retriever.flush(self.email_id_dict, CANCELLATIONS_FILENAME)