Example #1
0
def get_support_emails():
    logger.info('getting archive support emails')
    imap4 = imaplib.IMAP4_SSL(host=IMAP_SERVER, port=IMAP_PORT)
    imap4.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD'])
    imap4.select(FROM_ARCHIVE_ACCOUNTS_FOLDER)

    status, response = imap4.uid('search', None, 'ALL')
    if status != 'OK':
        logger.error('unable to search email server')
        exit(1)
    # response of the form: [b'1 2 3 4']
    if response[0] == b'':
        logger.info('no support emails to match against')
        return []

    msg_ids = response[0].decode().split(' ')
    logger.info('found {} support emails'.format(len(msg_ids)))
    responses = []
    for msg_ids_chunk in chunked(msg_ids, 1000):
        logger.debug('getting a message chunk')
        status, response = imap4.uid('fetch', ','.join(msg_ids_chunk),
                                     '(BODY[])')
        if status != 'OK':
            logger.error('unable to fetch from email server')
            exit(1)
        responses.extend(response)
    imap4.close()
    imap4.logout()

    return responses
Example #2
0
def get_msgs(folders):
    try:
        logger.debug('establishing connection to {}:{}'.format(IMAP_SERVER, IMAP_PORT))
        server = imapclient.IMAPClient(host=IMAP_SERVER, port=IMAP_PORT)
        logger.debug('logging in as ' + env['DIFFBOT_ADDRESS'])
        server.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD'])

        data = {}
        for folder in folders:
            logger.debug('selecting {}'.format(folder))
            server.select_folder(folder)
            logger.debug('polling for mail')
            msg_ids = server.search('ALL')
            logger.info("{} emails found".format(len(msg_ids)))

            logger.debug('fetching msg data')
            msg_data = {}
            for msg_ids_chunk in chunked(msg_ids, 1000):
                logger.info('getting msg data for ids [{}... {}]'.format(msg_ids_chunk[0], msg_ids_chunk[-1]))
                msg_data.update(server.fetch(msg_ids_chunk, ['BODY[TEXT]', 'ENVELOPE', 'INTERNALDATE']))

            data[folder] = msg_data

        server.logout()
        return data
    except Exception as e:
        logger.critical(e)
        raise e
Example #3
0
def hotswap_zd_msgs(zendesk_msgs):
    started_at = time.monotonic()

    pattern = re.compile(b'.*ZD(\d+):.*')
    msg_ids = list(zendesk_msgs.keys())
    ticket_ids = []
    for msg_id in msg_ids:
        try:
            bin_subject = zendesk_msgs[msg_id][b'ENVELOPE'].subject
            subject_match = pattern.match(bin_subject)
            if subject_match is None:
                raise Exception('invalid subject line "{}"'.format(
                    bin_subject.decode()))
            ticket_ids.append(int(subject_match.group(1).decode()))
        except Exception as e:
            ticket_ids.append(None)
            logger.error('bad subject line {}'.format(e))

    first_audit_ids = apiservice.concurrent_get_first_comments(ticket_ids)
    raw_emails = apiservice.concurrent_get_raw_emails(ticket_ids,
                                                      first_audit_ids)

    for msg_id, raw in zip(msg_ids, raw_emails):
        zendesk_msgs[msg_id][b'BODY[TEXT]'] = raw if raw is not None else b''

    logger.debug('completed hotswap in {} seconds'.format(
        round(time.monotonic() - started_at, 2)))
Example #4
0
def og_email_datetime_match(
    msg1, msg2, margin=datetime.timedelta(minutes=MINUTES_TIME_MATCH_ERROR)):
    d1 = msg1[b'INTERNALDATE']
    d2 = msg2[b'INTERNALDATE']
    verdict = d1 - d2 < margin and d2 - d1 < margin
    logger.debug('comparing {} and {}, {}within {} margin'.format(
        d1, d2, '' if verdict else 'not ', margin))
    return verdict
Example #5
0
def send_mail(sender, receiver, subject, body, html_body=None, cc=None, config=get_default_smtp_config()):
    host = config['host']
    port = config['port']
    user = config['user']
    password = config['pass']
    logger.info('sending email from {} to {} over {}:{}'.format(sender, receiver, host, port))
    logger.debug('composing message headers')

    msg = MIMEMultipart('alternative')
    msg['From'] = sender
    msg['To'] = receiver
    if isinstance(cc, list):
        msg['Cc'] = ', '.join(cc)
    msg['Subject'] = subject

    if html_body is not None:
        html_part = MIMEText(html_body, 'html')
        msg.attach(html_part)

    text_part = MIMEText(body, 'plain')
    msg.attach(text_part)

    try:
        logger.debug('establishing connection to {}:{}'.format(host, port))
        server = smtplib.SMTP_SSL(host=host, port=port)
        logger.debug('logging in as ' + user)
        server.login(user, password)
        logger.debug('sending')
        server.sendmail(config['user'], receiver, msg.as_string())
    except Exception as e:
        logger.critical(e)
        raise e
Example #6
0
def text_match(zd_msg, archive_msg, threshold=0.90):
    zd_subject = zd_msg[b'ENVELOPE'].subject
    if zd_subject is None:
        logger.warning('found message from zendesk with no subject')
    zd_subject = '' if zd_subject is None else zd_subject.decode()
    archive_subject = archive_msg[b'ENVELOPE'].subject
    if archive_subject is None:
        logger.warning('found message from archive with no subject')
    archive_subject = '' if archive_subject is None else archive_subject.decode(
    )
    try:
        zd_text = zd_msg[b'BODY[TEXT]'].decode()
        archive_text = archive_msg[b'BODY[TEXT]'].decode()
    except:
        logger.error('found msg with no body. subject: "{}" or "{}"'.format(
            zd_subject, archive_subject))
        return False

    # Cut the shit
    if zd_text == archive_text:
        logger.info('COMPLETE MATCH')
        return True

    # Preliminary check
    matcher = difflib.SequenceMatcher(isjunk=lambda c: c in ' \n\r\t')
    matcher.set_seqs(zd_text, archive_text)
    qr = matcher.quick_ratio()
    if qr < threshold:
        logger.debug('quick ratio: {} - "{}" and "{}" don\'t match'.format(
            qr, zd_subject, archive_subject))
        return False

    # Full check
    dmp = dmp_module.diff_match_patch()
    dmp.Diff_Timeout = 0.2
    diff = dmp.diff_main(zd_text, archive_text)
    d = dmp.diff_levenshtein(diff)
    ratio = 1 - d / max(len(zd_text), len(archive_text))
    verdict = ratio > threshold
    if verdict:
        logger.info('full ratio: {} - "{}" and "{}" FULL MATCH'.format(
            round(ratio, 4), zd_subject, archive_subject))
    else:
        logger.debug('full ratio: {} - "{}" and "{}" no match'.format(
            round(ratio, 4), zd_subject, archive_subject))
    return verdict
Example #7
0
def concurrent_get_raw_emails(ticket_ids, first_audit_ids):
    assert (len(ticket_ids) == len(first_audit_ids)
            ), 'unmatched ticket and first audit ids'
    url_template = 'https://archivesupport.zendesk.com/audits/{}/email.eml?ticket_id={}'
    session = get_logged_in_future_sesh()

    raw_email_futures = []
    for i, t_id, fa_id in zip(range(len(ticket_ids)), ticket_ids,
                              first_audit_ids):
        logger.debug('getting raw email future for ticket #{} {}/{}'.format(
            ticket_ids[i], i, len(ticket_ids)))
        if t_id is None or fa_id is None:
            raw_email_futures.append(None)
            continue
        raw_email_futures.append(session.get(url_template.format(fa_id, t_id)))
        time.sleep(60 / ZENDESK_API_RATE_LIMIT)

    raw_emails = []
    for i, raw_email_future in enumerate(raw_email_futures):
        if raw_email_future is None:
            raw_emails.append(None)
            continue
        result = raw_email_future.result()
        if result.status_code != 200:
            logger.error('bad status code {}: {}'.format(
                result.status_code, result.content))
            raw_emails.append(None)
            continue

        try:
            zd_body_buf = io.StringIO(result.content.decode())
            while zd_body_buf.readline().strip() != '':
                pass
            raw_emails.append(''.join(zd_body_buf.readlines()).encode())
        except Exception as e:
            logger.error('{}#{} problem while stripping headers: {}'.format(
                first_audit_ids[i], ticket_ids[i], e))
            raw_emails.append(None)

    return raw_emails
Example #8
0
def concurrent_get_first_comments(ticket_ids):
    session = FuturesSession()

    url_template = 'https://archivesupport.zendesk.com/api/v2/tickets/{}/audits.json'
    audit_ids = []

    for ticket_ids_chunk in chunked(ticket_ids, 1000):
        audit_futures = []
        for i, ticket_id in enumerate(ticket_ids_chunk):
            if ticket_id is None:
                audit_futures.append(None)
                continue
            logger.debug('getting audit future for ticket {}/{}'.format(
                i, len(ticket_ids_chunk)))
            audit_futures.append(
                session.get(url_template.format(ticket_id),
                            auth=HTTPBasicAuth(
                                env['ZENDESK_AGENT_ACCOUNT'] + "/token",
                                env['ZENDESK_API_KEY'])))
            time.sleep(60 / ZENDESK_API_RATE_LIMIT)

        for i, af in enumerate(audit_futures):
            if af is None:
                audit_ids.append(None)
                continue
            result = af.result()
            if result.status_code != 200:
                logger.error('ticket #{} bad status code {}: {}'.format(
                    ticket_ids_chunk[i], result.status_code, result.content))
                audit_ids.append(None)
                continue
            try:
                audit_ids.append(result.json()['audits'][0]['id'])
            except Exception as e:
                logger.error('while parsing result for #{} {}'.format(
                    ticket_ids_chunk[i], e))
                audit_ids.append(None)

    return audit_ids
Example #9
0
def comment_match(c1, c2):

    # remove all whitespace
    c1 = re.sub('[^\w]', '', c1)
    c2 = re.sub('[^\w]', '', c2)
    # c1 = re.sub('\s+', ' ', c1).strip()
    # c2 = re.sub('\s+', ' ', c2).strip()

    # zendesk seems to truncate comment size. This is a good-enough solution
    c1 = c1[:len(c2)]
    c2 = c2[:len(c1)]

    # could be an easy out
    if c1 == c2:
        logger.info('COMPLETE MATCH')
        return True

    # preliminary check
    matcher = difflib.SequenceMatcher(isjunk=lambda c: c in ' \n\r\t')
    matcher.set_seqs(c1, c2)
    qr = matcher.quick_ratio()
    if qr < COMMENT_MATCH_THRESHOLD:
        logger.debug(
            'quick ratio: {} - "{}..." and "{}..." don\'t match'.format(
                qr, c1[:30], c2[:30]))
        return False

    # full check
    dmp = dmp_module.diff_match_patch()
    # dmp.Diff_Timeout = 0.2
    diff = dmp.diff_main(c1, c2)
    # dmp.diff_cleanupSemantic(diff)
    d = dmp.diff_levenshtein(diff)
    ratio = 1 - d / max(len(c1), len(c2))
    verdict = ratio > COMMENT_MATCH_THRESHOLD
    if verdict:
        logger.info('full ratio: {} - "{}..." and "{}..." FULL MATCH'.format(
            round(ratio, 4), c1[:30], c2[:30]))
        logger.debug('DIFF:\n{}'.format(diff))
    elif ratio > COMMENT_MATCH_THRESHOLD * 0.2:
        logger.debug(
            'not close to match with full ratio {}: \n\nDIFF:\n{}'.format(
                round(ratio, 4), diff))
    else:
        logger.debug('full ratio: {} - "{}..." and "{}..." no match'.format(
            round(ratio, 4), c1[:30], c2[:30]))
    return verdict
Example #10
0
def change_folder(msg_ids, current_folder, new_folder):
    if not msg_ids:
        return
    logger.debug('establishing connection to {}:{}'.format(IMAP_SERVER, IMAP_PORT))
    server = imapclient.IMAPClient(host=IMAP_SERVER, port=IMAP_PORT)
    logger.debug('logging in as ' + env['DIFFBOT_ADDRESS'])
    server.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD'])
    server.select_folder(current_folder)
    logger.debug('moving {} messages from {} to {}'.format(len(msg_ids), current_folder, new_folder))

    for msg_ids_chunk in chunked(msg_ids, 1000):
        server.copy(msg_ids_chunk, new_folder)
        server.delete_messages(msg_ids_chunk)
        server.expunge(msg_ids_chunk)
    server.logout()
Example #11
0
def get_raw_mail(unseen=True, read_only=False, config=get_default_imap_config()):
    host = config['host']
    port = config['port']
    user = config['user']
    password = config['pass']
    folder = config['folder']
    logger.info('getting {}mail from {}'.format('' if unseen else 'new ', user))
    try:
        logger.debug('establishing connection to {}:{}'.format(host, port))
        server = imapclient.IMAPClient(host=host, port=port)
        logger.debug('logging in as ' + user)
        server.login(user, password)
        logger.debug('selecting {}'.format(folder))
        server.select_folder(folder)

        logger.debug('polling for mail')
        msg_ids = server.search('UNSEEN' if unseen else 'ALL')
        logger.debug("{} {}emails found".format(len(msg_ids), '' if unseen else 'new '))

        logger.debug('fetching msg data')
        msg_data = {}
        for msg_ids_chunk in chunked(msg_ids, 1000):
            msg_data.update(server.fetch(msg_ids_chunk, ['BODY[]', 'ENVELOPE']))

        server.logout()
        return msg_data
    except Exception as e:
        logger.critical(e)
        raise e
Example #12
0
def time_match(t1, t2):
    margin = MINUTES_TIME_MATCH_ERROR * 60
    verdict = t1 - t2 < margin and t2 - t1 < margin
    logger.debug('comparing {} and {}, {}within {} margin'.format(
        t1, t2, '' if verdict else 'not ', margin))
    return verdict