def get_support_emails(): logger.info('getting archive support emails') imap4 = imaplib.IMAP4_SSL(host=IMAP_SERVER, port=IMAP_PORT) imap4.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD']) imap4.select(FROM_ARCHIVE_ACCOUNTS_FOLDER) status, response = imap4.uid('search', None, 'ALL') if status != 'OK': logger.error('unable to search email server') exit(1) # response of the form: [b'1 2 3 4'] if response[0] == b'': logger.info('no support emails to match against') return [] msg_ids = response[0].decode().split(' ') logger.info('found {} support emails'.format(len(msg_ids))) responses = [] for msg_ids_chunk in chunked(msg_ids, 1000): logger.debug('getting a message chunk') status, response = imap4.uid('fetch', ','.join(msg_ids_chunk), '(BODY[])') if status != 'OK': logger.error('unable to fetch from email server') exit(1) responses.extend(response) imap4.close() imap4.logout() return responses
def get_msgs(folders): try: logger.debug('establishing connection to {}:{}'.format(IMAP_SERVER, IMAP_PORT)) server = imapclient.IMAPClient(host=IMAP_SERVER, port=IMAP_PORT) logger.debug('logging in as ' + env['DIFFBOT_ADDRESS']) server.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD']) data = {} for folder in folders: logger.debug('selecting {}'.format(folder)) server.select_folder(folder) logger.debug('polling for mail') msg_ids = server.search('ALL') logger.info("{} emails found".format(len(msg_ids))) logger.debug('fetching msg data') msg_data = {} for msg_ids_chunk in chunked(msg_ids, 1000): logger.info('getting msg data for ids [{}... {}]'.format(msg_ids_chunk[0], msg_ids_chunk[-1])) msg_data.update(server.fetch(msg_ids_chunk, ['BODY[TEXT]', 'ENVELOPE', 'INTERNALDATE'])) data[folder] = msg_data server.logout() return data except Exception as e: logger.critical(e) raise e
def hotswap_zd_msgs(zendesk_msgs): started_at = time.monotonic() pattern = re.compile(b'.*ZD(\d+):.*') msg_ids = list(zendesk_msgs.keys()) ticket_ids = [] for msg_id in msg_ids: try: bin_subject = zendesk_msgs[msg_id][b'ENVELOPE'].subject subject_match = pattern.match(bin_subject) if subject_match is None: raise Exception('invalid subject line "{}"'.format( bin_subject.decode())) ticket_ids.append(int(subject_match.group(1).decode())) except Exception as e: ticket_ids.append(None) logger.error('bad subject line {}'.format(e)) first_audit_ids = apiservice.concurrent_get_first_comments(ticket_ids) raw_emails = apiservice.concurrent_get_raw_emails(ticket_ids, first_audit_ids) for msg_id, raw in zip(msg_ids, raw_emails): zendesk_msgs[msg_id][b'BODY[TEXT]'] = raw if raw is not None else b'' logger.debug('completed hotswap in {} seconds'.format( round(time.monotonic() - started_at, 2)))
def og_email_datetime_match( msg1, msg2, margin=datetime.timedelta(minutes=MINUTES_TIME_MATCH_ERROR)): d1 = msg1[b'INTERNALDATE'] d2 = msg2[b'INTERNALDATE'] verdict = d1 - d2 < margin and d2 - d1 < margin logger.debug('comparing {} and {}, {}within {} margin'.format( d1, d2, '' if verdict else 'not ', margin)) return verdict
def send_mail(sender, receiver, subject, body, html_body=None, cc=None, config=get_default_smtp_config()): host = config['host'] port = config['port'] user = config['user'] password = config['pass'] logger.info('sending email from {} to {} over {}:{}'.format(sender, receiver, host, port)) logger.debug('composing message headers') msg = MIMEMultipart('alternative') msg['From'] = sender msg['To'] = receiver if isinstance(cc, list): msg['Cc'] = ', '.join(cc) msg['Subject'] = subject if html_body is not None: html_part = MIMEText(html_body, 'html') msg.attach(html_part) text_part = MIMEText(body, 'plain') msg.attach(text_part) try: logger.debug('establishing connection to {}:{}'.format(host, port)) server = smtplib.SMTP_SSL(host=host, port=port) logger.debug('logging in as ' + user) server.login(user, password) logger.debug('sending') server.sendmail(config['user'], receiver, msg.as_string()) except Exception as e: logger.critical(e) raise e
def text_match(zd_msg, archive_msg, threshold=0.90): zd_subject = zd_msg[b'ENVELOPE'].subject if zd_subject is None: logger.warning('found message from zendesk with no subject') zd_subject = '' if zd_subject is None else zd_subject.decode() archive_subject = archive_msg[b'ENVELOPE'].subject if archive_subject is None: logger.warning('found message from archive with no subject') archive_subject = '' if archive_subject is None else archive_subject.decode( ) try: zd_text = zd_msg[b'BODY[TEXT]'].decode() archive_text = archive_msg[b'BODY[TEXT]'].decode() except: logger.error('found msg with no body. subject: "{}" or "{}"'.format( zd_subject, archive_subject)) return False # Cut the shit if zd_text == archive_text: logger.info('COMPLETE MATCH') return True # Preliminary check matcher = difflib.SequenceMatcher(isjunk=lambda c: c in ' \n\r\t') matcher.set_seqs(zd_text, archive_text) qr = matcher.quick_ratio() if qr < threshold: logger.debug('quick ratio: {} - "{}" and "{}" don\'t match'.format( qr, zd_subject, archive_subject)) return False # Full check dmp = dmp_module.diff_match_patch() dmp.Diff_Timeout = 0.2 diff = dmp.diff_main(zd_text, archive_text) d = dmp.diff_levenshtein(diff) ratio = 1 - d / max(len(zd_text), len(archive_text)) verdict = ratio > threshold if verdict: logger.info('full ratio: {} - "{}" and "{}" FULL MATCH'.format( round(ratio, 4), zd_subject, archive_subject)) else: logger.debug('full ratio: {} - "{}" and "{}" no match'.format( round(ratio, 4), zd_subject, archive_subject)) return verdict
def concurrent_get_raw_emails(ticket_ids, first_audit_ids): assert (len(ticket_ids) == len(first_audit_ids) ), 'unmatched ticket and first audit ids' url_template = 'https://archivesupport.zendesk.com/audits/{}/email.eml?ticket_id={}' session = get_logged_in_future_sesh() raw_email_futures = [] for i, t_id, fa_id in zip(range(len(ticket_ids)), ticket_ids, first_audit_ids): logger.debug('getting raw email future for ticket #{} {}/{}'.format( ticket_ids[i], i, len(ticket_ids))) if t_id is None or fa_id is None: raw_email_futures.append(None) continue raw_email_futures.append(session.get(url_template.format(fa_id, t_id))) time.sleep(60 / ZENDESK_API_RATE_LIMIT) raw_emails = [] for i, raw_email_future in enumerate(raw_email_futures): if raw_email_future is None: raw_emails.append(None) continue result = raw_email_future.result() if result.status_code != 200: logger.error('bad status code {}: {}'.format( result.status_code, result.content)) raw_emails.append(None) continue try: zd_body_buf = io.StringIO(result.content.decode()) while zd_body_buf.readline().strip() != '': pass raw_emails.append(''.join(zd_body_buf.readlines()).encode()) except Exception as e: logger.error('{}#{} problem while stripping headers: {}'.format( first_audit_ids[i], ticket_ids[i], e)) raw_emails.append(None) return raw_emails
def concurrent_get_first_comments(ticket_ids): session = FuturesSession() url_template = 'https://archivesupport.zendesk.com/api/v2/tickets/{}/audits.json' audit_ids = [] for ticket_ids_chunk in chunked(ticket_ids, 1000): audit_futures = [] for i, ticket_id in enumerate(ticket_ids_chunk): if ticket_id is None: audit_futures.append(None) continue logger.debug('getting audit future for ticket {}/{}'.format( i, len(ticket_ids_chunk))) audit_futures.append( session.get(url_template.format(ticket_id), auth=HTTPBasicAuth( env['ZENDESK_AGENT_ACCOUNT'] + "/token", env['ZENDESK_API_KEY']))) time.sleep(60 / ZENDESK_API_RATE_LIMIT) for i, af in enumerate(audit_futures): if af is None: audit_ids.append(None) continue result = af.result() if result.status_code != 200: logger.error('ticket #{} bad status code {}: {}'.format( ticket_ids_chunk[i], result.status_code, result.content)) audit_ids.append(None) continue try: audit_ids.append(result.json()['audits'][0]['id']) except Exception as e: logger.error('while parsing result for #{} {}'.format( ticket_ids_chunk[i], e)) audit_ids.append(None) return audit_ids
def comment_match(c1, c2): # remove all whitespace c1 = re.sub('[^\w]', '', c1) c2 = re.sub('[^\w]', '', c2) # c1 = re.sub('\s+', ' ', c1).strip() # c2 = re.sub('\s+', ' ', c2).strip() # zendesk seems to truncate comment size. This is a good-enough solution c1 = c1[:len(c2)] c2 = c2[:len(c1)] # could be an easy out if c1 == c2: logger.info('COMPLETE MATCH') return True # preliminary check matcher = difflib.SequenceMatcher(isjunk=lambda c: c in ' \n\r\t') matcher.set_seqs(c1, c2) qr = matcher.quick_ratio() if qr < COMMENT_MATCH_THRESHOLD: logger.debug( 'quick ratio: {} - "{}..." and "{}..." don\'t match'.format( qr, c1[:30], c2[:30])) return False # full check dmp = dmp_module.diff_match_patch() # dmp.Diff_Timeout = 0.2 diff = dmp.diff_main(c1, c2) # dmp.diff_cleanupSemantic(diff) d = dmp.diff_levenshtein(diff) ratio = 1 - d / max(len(c1), len(c2)) verdict = ratio > COMMENT_MATCH_THRESHOLD if verdict: logger.info('full ratio: {} - "{}..." and "{}..." FULL MATCH'.format( round(ratio, 4), c1[:30], c2[:30])) logger.debug('DIFF:\n{}'.format(diff)) elif ratio > COMMENT_MATCH_THRESHOLD * 0.2: logger.debug( 'not close to match with full ratio {}: \n\nDIFF:\n{}'.format( round(ratio, 4), diff)) else: logger.debug('full ratio: {} - "{}..." and "{}..." no match'.format( round(ratio, 4), c1[:30], c2[:30])) return verdict
def change_folder(msg_ids, current_folder, new_folder): if not msg_ids: return logger.debug('establishing connection to {}:{}'.format(IMAP_SERVER, IMAP_PORT)) server = imapclient.IMAPClient(host=IMAP_SERVER, port=IMAP_PORT) logger.debug('logging in as ' + env['DIFFBOT_ADDRESS']) server.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD']) server.select_folder(current_folder) logger.debug('moving {} messages from {} to {}'.format(len(msg_ids), current_folder, new_folder)) for msg_ids_chunk in chunked(msg_ids, 1000): server.copy(msg_ids_chunk, new_folder) server.delete_messages(msg_ids_chunk) server.expunge(msg_ids_chunk) server.logout()
def get_raw_mail(unseen=True, read_only=False, config=get_default_imap_config()): host = config['host'] port = config['port'] user = config['user'] password = config['pass'] folder = config['folder'] logger.info('getting {}mail from {}'.format('' if unseen else 'new ', user)) try: logger.debug('establishing connection to {}:{}'.format(host, port)) server = imapclient.IMAPClient(host=host, port=port) logger.debug('logging in as ' + user) server.login(user, password) logger.debug('selecting {}'.format(folder)) server.select_folder(folder) logger.debug('polling for mail') msg_ids = server.search('UNSEEN' if unseen else 'ALL') logger.debug("{} {}emails found".format(len(msg_ids), '' if unseen else 'new ')) logger.debug('fetching msg data') msg_data = {} for msg_ids_chunk in chunked(msg_ids, 1000): msg_data.update(server.fetch(msg_ids_chunk, ['BODY[]', 'ENVELOPE'])) server.logout() return msg_data except Exception as e: logger.critical(e) raise e
def time_match(t1, t2): margin = MINUTES_TIME_MATCH_ERROR * 60 verdict = t1 - t2 < margin and t2 - t1 < margin logger.debug('comparing {} and {}, {}within {} margin'.format( t1, t2, '' if verdict else 'not ', margin)) return verdict