def get_zd_updates(start_time): url_template = 'https://archivesupport.zendesk.com/api/v2/incremental/ticket_events.json?start_time={}&include=comment_events' next_page = url_template.format(start_time) session = requests.session() ticket_events = [] while True: logger.info( 'getting incremental ticket updates from Zendesk since {}'.format( start_time)) response = session.get(next_page, auth=HTTPBasicAuth( env['ZENDESK_AGENT_ACCOUNT'] + "/token", env['ZENDESK_API_KEY'])) assert (response.status_code == 200), "{}: {}".format( response.status_code, response.content) data = response.json() ticket_events.extend(data['ticket_events']) if data['end_of_stream']: break next_page = data['next_page'] start_time = data['end_time'] logger.info('found {} zendesk updates'.format(len(ticket_events))) end_time = data['end_time'] if end_time is None: end_time = start_time return end_time, ticket_events
def get_raw_mail(unseen=True, read_only=False, config=get_default_imap_config()): host = config['host'] port = config['port'] user = config['user'] password = config['pass'] folder = config['folder'] logger.info('getting {}mail from {}'.format('' if unseen else 'new ', user)) try: logger.debug('establishing connection to {}:{}'.format(host, port)) server = imapclient.IMAPClient(host=host, port=port) logger.debug('logging in as ' + user) server.login(user, password) logger.debug('selecting {}'.format(folder)) server.select_folder(folder) logger.debug('polling for mail') msg_ids = server.search('UNSEEN' if unseen else 'ALL') logger.debug("{} {}emails found".format(len(msg_ids), '' if unseen else 'new ')) logger.debug('fetching msg data') msg_data = {} for msg_ids_chunk in chunked(msg_ids, 1000): msg_data.update(server.fetch(msg_ids_chunk, ['BODY[]', 'ENVELOPE'])) server.logout() return msg_data except Exception as e: logger.critical(e) raise e
def get_msgs(folders): try: logger.debug('establishing connection to {}:{}'.format(IMAP_SERVER, IMAP_PORT)) server = imapclient.IMAPClient(host=IMAP_SERVER, port=IMAP_PORT) logger.debug('logging in as ' + env['DIFFBOT_ADDRESS']) server.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD']) data = {} for folder in folders: logger.debug('selecting {}'.format(folder)) server.select_folder(folder) logger.debug('polling for mail') msg_ids = server.search('ALL') logger.info("{} emails found".format(len(msg_ids))) logger.debug('fetching msg data') msg_data = {} for msg_ids_chunk in chunked(msg_ids, 1000): logger.info('getting msg data for ids [{}... {}]'.format(msg_ids_chunk[0], msg_ids_chunk[-1])) msg_data.update(server.fetch(msg_ids_chunk, ['BODY[TEXT]', 'ENVELOPE', 'INTERNALDATE'])) data[folder] = msg_data server.logout() return data except Exception as e: logger.critical(e) raise e
def send_mail(sender, receiver, subject, body, html_body=None, cc=None, config=get_default_smtp_config()): host = config['host'] port = config['port'] user = config['user'] password = config['pass'] logger.info('sending email from {} to {} over {}:{}'.format(sender, receiver, host, port)) logger.debug('composing message headers') msg = MIMEMultipart('alternative') msg['From'] = sender msg['To'] = receiver if isinstance(cc, list): msg['Cc'] = ', '.join(cc) msg['Subject'] = subject if html_body is not None: html_part = MIMEText(html_body, 'html') msg.attach(html_part) text_part = MIMEText(body, 'plain') msg.attach(text_part) try: logger.debug('establishing connection to {}:{}'.format(host, port)) server = smtplib.SMTP_SSL(host=host, port=port) logger.debug('logging in as ' + user) server.login(user, password) logger.debug('sending') server.sendmail(config['user'], receiver, msg.as_string()) except Exception as e: logger.critical(e) raise e
def og_diff_pools(dict1, dict2, sort_key, within_error, is_match): """ :param dict1: first pool of items :param dict2: second pool of items :param sort_key: function for sorting items :param within_error: func whether items are within error margin w/respect to sort attr. :param is_match: func whether items are match :return: four tuple: matched/unmatched from dict1/dict2 """ # decorate and sort aa = sorted(dict1.items(), key=sort_key) bb = sorted(dict2.items(), key=sort_key) j0 = 0 # the 'lower bound' index in bb for matching a_matched = {} a_unmatched = {} b_matched = {} b_unmatched = {} for i, kv_pair in enumerate(aa): logger.info('Searching for match for item {}/{}'.format(i, len(aa))) ka, a = kv_pair # move the lower bound up while j0 < len(bb): # once we are within error, stop increasing if within_error(a, bb[j0][1]): break # if we get beyond our aa item, stop increasing if sort_key(kv_pair) < sort_key(bb[j0]): break # any unmatched bb items at j0 at this point are unmatched if not bb[j0][0] in b_matched: b_unmatched[bb[j0][0]] = bb[j0][1] j0 += 1 j = j0 while True: # don't run off the end of bb if j == len(bb): a_unmatched[ka] = a break # skip bb items already matched elif bb[j][0] in b_matched: pass # if bb item is beyond error, then no match for aa item elif not within_error(a, bb[j][1]): a_unmatched[ka] = a break # we got a match elif is_match(a, bb[j][1]): a_matched[ka] = a b_matched[bb[j][0]] = bb[j][1] break # try the next item in bb j += 1 return a_matched, a_unmatched, b_matched, b_unmatched
def consult(): """ payload structure: { "ticket_id": Int, "consultant": "<consultant>@archive.org, ...", "subject": String, "body": String, "html_body": String, } :return: """ logger.info("{} request from {}".format(request.method, request.origin)) auth = request.authorization if auth is None: message = "Provide basic auth to use this service." logger.error(message) return jsonify({"Error": message}), 401 if (auth['username'] != env['ZENDESK_TRIGGER_USERNAME'] or auth['password'] != env['ZENDESK_TRIGGER_PASSWORD']): message = "Invalid Username/Password" logger.error(message) return jsonify({"Error": message}), 401 try: json = request.get_json() except BadRequest as e: message = "Bad Request: Could not parse json object" logger.error(message) return jsonify({"Error": message}), 400 # verify correct keys required_keys = ['consultant', 'subject', 'body', 'html_body', 'ticket_id'] if any(map(lambda k: k not in json, required_keys)): logger.error("Invalid data keys") return jsonify({ "Error": "Json object must contain the following non-optional keys", "keys": ["consultant", "subject", "body", "html_body", "ticket_id"] }), 400 # send mail body = INTERNAL_MESSAGE_PLAIN + json['body'] html_body = INTERNAL_MESSAGE_HTML + json['html_body'] body = body.replace('\\n', '\n') html_body = html_body.replace('\\n', '\n') for consultant in json['consultant'].replace(' ', '').split(','): mail.send_mail( sender='{} <{}>'.format(MAILBOT_NAME, env['MAILBOT_ADDRESS']), receiver=consultant, subject=SUBJECT_PATTERN.format(json['ticket_id'], json['subject']), body=body, html_body=html_body, cc=['{} <{}>'.format(MAILBOT_CC_NAME, env['MAILBOT_CC_ADDRESS'])]) return jsonify({"Success": "Consultant has been emailed"}), 200
def update_ticket(ticket_id, body, sender, public=True): signed = sender[0].split(' ')[0] if public else "{} <{}>".format(*sender) comment = comment_template.format(body=body, signed=signed) payload = payload_template_dict payload['ticket']['comment']['body'] = comment payload['ticket']['comment']['public'] = public response = post_ticket_update(ticket_id, payload) if response.status_code != 200: logger.info('ZD API: ' + response.content) raise TicketUpdateException()
def parse_emails_response(support_emails_response): # these configurations match what we get from zendesk html2text.config.IGNORE_TABLES = True html2text.config.IGNORE_IMAGES = False h = html2text.HTML2Text() h.body_width = 0 h.ignore_links = True # patterns and formats id_pattern = re.compile(b".*UID (\d+) .*") time_str_format = '%a, %d %b %Y %H:%M:%S %z (%Z)' # collect decorated messages [(timestamp, comment, id)...] support_decorated_comments = [] logger.info('parsing archive support email data') for li in support_emails_response: # weird case – something isn't implemented properly in the libraries if li == b')': continue id_bytes, msg_bytes = li msg = BytesParser(policy=policy.default).parsebytes(msg_bytes) # get id msg_id = int(re.match(id_pattern, id_bytes.strip()).group(1)) # get time stamp time_str = msg['Received'].split(';')[-1].strip() time_stamp = parser.parse(time_str).timestamp() # get message body raw = msg.get_body(preferencelist=('plain', )) if raw is not None: try: body = raw.get_content() except LookupError as e: logger.error(e) continue else: raw = msg.get_body(preferencelist=('html', )) if raw is None: logger.error('Found message with no plain or html body') continue try: html_content = raw.get_content() except LookupError as e: logger.error(e) continue body = h.handle(html_content) support_decorated_comments.append((time_stamp, body, msg_id)) return support_decorated_comments
def run(): USE_TZ = True # makes datetime.now() not naive zd_still_fresh_filename = 'zd_still_fresh.pickle' start_time, still_fresh_zd_triples = get_still_fresh_zd_triples( zd_still_fresh_filename) while True: logger.info('START CYCLE') end_time, ticket_events = get_zd_updates(start_time) start_time = end_time zd_decorated_comments = process_events(ticket_events) zd_decorated_comments += still_fresh_zd_triples support_emails_response = get_support_emails() support_decorated_comments = parse_emails_response( support_emails_response) results = diff.match_msgs(zd_decorated_comments, support_decorated_comments) logger.info("zd_matched: {}/{}".format(len(results[0]), len(zd_decorated_comments))) logger.info("archive_matched: {}/{}".format( len(results[2]), len(support_decorated_comments))) cleanup(*results, zd_still_fresh_filename, start_time) logger.info('END CYCLE - sleeping for {} seconds'.format( DIFFBOT_LOOP_WAIT_PERIOD)) time.sleep(DIFFBOT_LOOP_WAIT_PERIOD)
def run(): # connection = setup_connection() # msg_ids = get_new_msg_ids(connection) # msgs_data = get_data_for_msgs(connection, msg_ids) # pickle.dump(msgs_data, open('message-data.pickle', 'wb')) msgs_data = mail.get_raw_mail() for msg_id in msgs_data.keys(): logger.info("parsing message") ticket, body, sender, reply_all = parse_msg_data(msgs_data[msg_id]) if ticket is None: logger.info("sending rejection message") send_rejection(sender[1]) continue logger.info("updating ticket") try: update_ticket(ticket, body, sender, reply_all) except TicketUpdateException as e: logger.error(e) continue logger.info( "ticket updated – ID: {}, Body: {}, Sender: {}, Public: {}".format( ticket, body.partition('\n')[0][:100], sender, reply_all))
def comment_match(c1, c2): # remove all whitespace c1 = re.sub('[^\w]', '', c1) c2 = re.sub('[^\w]', '', c2) # c1 = re.sub('\s+', ' ', c1).strip() # c2 = re.sub('\s+', ' ', c2).strip() # zendesk seems to truncate comment size. This is a good-enough solution c1 = c1[:len(c2)] c2 = c2[:len(c1)] # could be an easy out if c1 == c2: logger.info('COMPLETE MATCH') return True # preliminary check matcher = difflib.SequenceMatcher(isjunk=lambda c: c in ' \n\r\t') matcher.set_seqs(c1, c2) qr = matcher.quick_ratio() if qr < COMMENT_MATCH_THRESHOLD: logger.debug( 'quick ratio: {} - "{}..." and "{}..." don\'t match'.format( qr, c1[:30], c2[:30])) return False # full check dmp = dmp_module.diff_match_patch() # dmp.Diff_Timeout = 0.2 diff = dmp.diff_main(c1, c2) # dmp.diff_cleanupSemantic(diff) d = dmp.diff_levenshtein(diff) ratio = 1 - d / max(len(c1), len(c2)) verdict = ratio > COMMENT_MATCH_THRESHOLD if verdict: logger.info('full ratio: {} - "{}..." and "{}..." FULL MATCH'.format( round(ratio, 4), c1[:30], c2[:30])) logger.debug('DIFF:\n{}'.format(diff)) elif ratio > COMMENT_MATCH_THRESHOLD * 0.2: logger.debug( 'not close to match with full ratio {}: \n\nDIFF:\n{}'.format( round(ratio, 4), diff)) else: logger.debug('full ratio: {} - "{}..." and "{}..." no match'.format( round(ratio, 4), c1[:30], c2[:30])) return verdict
def text_match(zd_msg, archive_msg, threshold=0.90): zd_subject = zd_msg[b'ENVELOPE'].subject if zd_subject is None: logger.warning('found message from zendesk with no subject') zd_subject = '' if zd_subject is None else zd_subject.decode() archive_subject = archive_msg[b'ENVELOPE'].subject if archive_subject is None: logger.warning('found message from archive with no subject') archive_subject = '' if archive_subject is None else archive_subject.decode( ) try: zd_text = zd_msg[b'BODY[TEXT]'].decode() archive_text = archive_msg[b'BODY[TEXT]'].decode() except: logger.error('found msg with no body. subject: "{}" or "{}"'.format( zd_subject, archive_subject)) return False # Cut the shit if zd_text == archive_text: logger.info('COMPLETE MATCH') return True # Preliminary check matcher = difflib.SequenceMatcher(isjunk=lambda c: c in ' \n\r\t') matcher.set_seqs(zd_text, archive_text) qr = matcher.quick_ratio() if qr < threshold: logger.debug('quick ratio: {} - "{}" and "{}" don\'t match'.format( qr, zd_subject, archive_subject)) return False # Full check dmp = dmp_module.diff_match_patch() dmp.Diff_Timeout = 0.2 diff = dmp.diff_main(zd_text, archive_text) d = dmp.diff_levenshtein(diff) ratio = 1 - d / max(len(zd_text), len(archive_text)) verdict = ratio > threshold if verdict: logger.info('full ratio: {} - "{}" and "{}" FULL MATCH'.format( round(ratio, 4), zd_subject, archive_subject)) else: logger.debug('full ratio: {} - "{}" and "{}" no match'.format( round(ratio, 4), zd_subject, archive_subject)) return verdict
def process_events(ticket_events): zendesk_comments = [] # [(timestamp, comment, ticket_id)...] for event in ticket_events: contents_found = 0 # TODO ditch this variable for child in event['child_events']: if child['event_type'].lower() == 'comment': contents_found += 1 if contents_found > 1: logger.error( 'found {} comment children in single event'.format( contents_found)) zendesk_comments.append( (event['timestamp'], child['body'], event['ticket_id'])) logger.info('found {} zendesk comments'.format(len(zendesk_comments))) return zendesk_comments
def get_support_emails(): logger.info('getting archive support emails') imap4 = imaplib.IMAP4_SSL(host=IMAP_SERVER, port=IMAP_PORT) imap4.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD']) imap4.select(FROM_ARCHIVE_ACCOUNTS_FOLDER) status, response = imap4.uid('search', None, 'ALL') if status != 'OK': logger.error('unable to search email server') exit(1) # response of the form: [b'1 2 3 4'] if response[0] == b'': logger.info('no support emails to match against') return [] msg_ids = response[0].decode().split(' ') logger.info('found {} support emails'.format(len(msg_ids))) responses = [] for msg_ids_chunk in chunked(msg_ids, 1000): logger.debug('getting a message chunk') status, response = imap4.uid('fetch', ','.join(msg_ids_chunk), '(BODY[])') if status != 'OK': logger.error('unable to fetch from email server') exit(1) responses.extend(response) imap4.close() imap4.logout() return responses
def cleanup(zd_matched, zd_unmatched, archive_matched, archive_unmatched, zd_still_fresh_filename, start_time): imap4 = imaplib.IMAP4_SSL(host=IMAP_SERVER, port=IMAP_PORT) imap4.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD']) imap4.select(FROM_ARCHIVE_ACCOUNTS_FOLDER) # move matched emails archive_matched_ids = [str(msg_id) for _, _, msg_id in archive_matched] logger.info('moving {} matched emails'.format(len(archive_matched_ids))) for msg_ids_chunk in chunked(archive_matched_ids, 1000): uids = ','.join(msg_ids_chunk) result, err = imap4.uid('COPY', uids, MATCHED_ARCHIVE_FOLDER) if result != 'OK': logger.error('unable to copy items') result, delete = imap4.uid('STORE', uids, '+FLAGS', '(\Deleted)') if result != 'OK': logger.error('unable to delete original versions') # deal with unmatched updates from Zendesk cutoff = datetime.datetime.now().timestamp() - MINUTES_GRACE_PERIOD * 60 still_fresh = [] old = [] for triple in zd_unmatched: if triple[0] > cutoff: still_fresh.append(triple) else: old.append(triple) # save still fresh for later logger.info('saving {} zd ticket messages for the next round'.format( len(still_fresh))) pickle.dump((start_time, still_fresh), open(zd_still_fresh_filename, 'wb')) # log old unmatched ticket comments logger.info('logging {} old zd ticket messages that went unmatched'.format( len(old))) with open('zd_unmatched.log', 'a') as f: for t, c, t_id in old: f.write(""" Ticket #{} Time: {} Comment: {} """.format(t_id, str(datetime.datetime.fromtimestamp(t)), c)) # move old emails to unmatched cutoff = datetime.datetime.now().timestamp() - MINUTES_GRACE_PERIOD * 60 old_archive_unmatched_ids = [ str(msg_id) for t, _, msg_id in archive_unmatched if t < cutoff ] logger.info('moving {} old unmatched archive emails'.format( len(old_archive_unmatched_ids))) for msg_ids_chunk in chunked(old_archive_unmatched_ids, 1000): uids = ','.join(msg_ids_chunk) result, err = imap4.uid('COPY', uids, UNMATCHED_ARCHIVE_FOLDER) if result != 'OK': logger.error('unable to copy items') result, delete = imap4.uid('STORE', uids, '+FLAGS', '(\Deleted)') if result != 'OK': logger.error('unable to delete original versions') # we're done here imap4.expunge() imap4.close() imap4.logout()
def matching(needles, haystack, sort_key, within_error, is_match): """ :param needles: items to be matched :param haystack: candidate matches :param sort_key: function for sorting items :param within_error: func whether items are within error margin w/respect to sort attr. :param is_match: func whether items are match :return: four tuple: matched/unmatched from dict1/dict2 """ needles.sort(key=sort_key) haystack.sort(key=sort_key) j0 = 0 # the 'lower bound' index in bb for matching matched_needles = set() unmatched_needles = set() matched_hay = set() unmatched_hay = set() # import pudb # pudb.set_trace() for i, triple in enumerate(needles): logger.info('Searching for match for item {}/{}'.format( i, len(needles))) t, c, n = triple # move the lower bound up while j0 < len(haystack): # once we are within error, stop increasing if within_error(t, haystack[j0][0]): break # if we get beyond our needle, stop increasing if sort_key(triple) < sort_key(haystack[j0]): break # any unmatched hay at j0 at this point is unmatched if not haystack[j0] in matched_hay: unmatched_hay.add(haystack[j0]) j0 += 1 j = j0 MATCH_FOUND = False # TODO remove while True: # don't run off the end of haystack if j == len(haystack): # TODO if not MATCH_FOUND: unmatched_needles.add(triple) break # skip hay already matched elif haystack[j] in matched_hay: pass # if hay is beyond error, then no match for needle elif not within_error(t, haystack[j][0]): # TODO if not MATCH_FOUND: unmatched_needles.add(triple) break # we got a match elif is_match(c, haystack[j][1]): matched_needles.add(triple) matched_hay.add(haystack[j]) # TODO: bring this break back when we know that we don't have duplicate emails MATCH_FOUND = True # break # try the next hay j += 1 return matched_needles, unmatched_needles, matched_hay, unmatched_hay