Ejemplo n.º 1
0
def get_zd_updates(start_time):
    url_template = 'https://archivesupport.zendesk.com/api/v2/incremental/ticket_events.json?start_time={}&include=comment_events'
    next_page = url_template.format(start_time)
    session = requests.session()

    ticket_events = []
    while True:
        logger.info(
            'getting incremental ticket updates from Zendesk since {}'.format(
                start_time))
        response = session.get(next_page,
                               auth=HTTPBasicAuth(
                                   env['ZENDESK_AGENT_ACCOUNT'] + "/token",
                                   env['ZENDESK_API_KEY']))
        assert (response.status_code == 200), "{}: {}".format(
            response.status_code, response.content)
        data = response.json()
        ticket_events.extend(data['ticket_events'])

        if data['end_of_stream']:
            break
        next_page = data['next_page']
        start_time = data['end_time']

    logger.info('found {} zendesk updates'.format(len(ticket_events)))

    end_time = data['end_time']
    if end_time is None:
        end_time = start_time

    return end_time, ticket_events
Ejemplo n.º 2
0
def get_raw_mail(unseen=True, read_only=False, config=get_default_imap_config()):
    host = config['host']
    port = config['port']
    user = config['user']
    password = config['pass']
    folder = config['folder']
    logger.info('getting {}mail from {}'.format('' if unseen else 'new ', user))
    try:
        logger.debug('establishing connection to {}:{}'.format(host, port))
        server = imapclient.IMAPClient(host=host, port=port)
        logger.debug('logging in as ' + user)
        server.login(user, password)
        logger.debug('selecting {}'.format(folder))
        server.select_folder(folder)

        logger.debug('polling for mail')
        msg_ids = server.search('UNSEEN' if unseen else 'ALL')
        logger.debug("{} {}emails found".format(len(msg_ids), '' if unseen else 'new '))

        logger.debug('fetching msg data')
        msg_data = {}
        for msg_ids_chunk in chunked(msg_ids, 1000):
            msg_data.update(server.fetch(msg_ids_chunk, ['BODY[]', 'ENVELOPE']))

        server.logout()
        return msg_data
    except Exception as e:
        logger.critical(e)
        raise e
Ejemplo n.º 3
0
def get_msgs(folders):
    try:
        logger.debug('establishing connection to {}:{}'.format(IMAP_SERVER, IMAP_PORT))
        server = imapclient.IMAPClient(host=IMAP_SERVER, port=IMAP_PORT)
        logger.debug('logging in as ' + env['DIFFBOT_ADDRESS'])
        server.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD'])

        data = {}
        for folder in folders:
            logger.debug('selecting {}'.format(folder))
            server.select_folder(folder)
            logger.debug('polling for mail')
            msg_ids = server.search('ALL')
            logger.info("{} emails found".format(len(msg_ids)))

            logger.debug('fetching msg data')
            msg_data = {}
            for msg_ids_chunk in chunked(msg_ids, 1000):
                logger.info('getting msg data for ids [{}... {}]'.format(msg_ids_chunk[0], msg_ids_chunk[-1]))
                msg_data.update(server.fetch(msg_ids_chunk, ['BODY[TEXT]', 'ENVELOPE', 'INTERNALDATE']))

            data[folder] = msg_data

        server.logout()
        return data
    except Exception as e:
        logger.critical(e)
        raise e
Ejemplo n.º 4
0
def send_mail(sender, receiver, subject, body, html_body=None, cc=None, config=get_default_smtp_config()):
    host = config['host']
    port = config['port']
    user = config['user']
    password = config['pass']
    logger.info('sending email from {} to {} over {}:{}'.format(sender, receiver, host, port))
    logger.debug('composing message headers')

    msg = MIMEMultipart('alternative')
    msg['From'] = sender
    msg['To'] = receiver
    if isinstance(cc, list):
        msg['Cc'] = ', '.join(cc)
    msg['Subject'] = subject

    if html_body is not None:
        html_part = MIMEText(html_body, 'html')
        msg.attach(html_part)

    text_part = MIMEText(body, 'plain')
    msg.attach(text_part)

    try:
        logger.debug('establishing connection to {}:{}'.format(host, port))
        server = smtplib.SMTP_SSL(host=host, port=port)
        logger.debug('logging in as ' + user)
        server.login(user, password)
        logger.debug('sending')
        server.sendmail(config['user'], receiver, msg.as_string())
    except Exception as e:
        logger.critical(e)
        raise e
Ejemplo n.º 5
0
def og_diff_pools(dict1, dict2, sort_key, within_error, is_match):
    """
    :param dict1: first pool of items
    :param dict2: second pool of items
    :param sort_key: function for sorting items
    :param within_error: func whether items are within error margin w/respect to sort attr.
    :param is_match: func whether items are match
    :return: four tuple: matched/unmatched from dict1/dict2
    """

    # decorate and sort
    aa = sorted(dict1.items(), key=sort_key)
    bb = sorted(dict2.items(), key=sort_key)

    j0 = 0  # the 'lower bound' index in bb for matching

    a_matched = {}
    a_unmatched = {}
    b_matched = {}
    b_unmatched = {}

    for i, kv_pair in enumerate(aa):
        logger.info('Searching for match for item {}/{}'.format(i, len(aa)))
        ka, a = kv_pair

        # move the lower bound up
        while j0 < len(bb):
            # once we are within error, stop increasing
            if within_error(a, bb[j0][1]):
                break
            # if we get beyond our aa item, stop increasing
            if sort_key(kv_pair) < sort_key(bb[j0]):
                break
            # any unmatched bb items at j0 at this point are unmatched
            if not bb[j0][0] in b_matched:
                b_unmatched[bb[j0][0]] = bb[j0][1]
            j0 += 1

        j = j0
        while True:
            # don't run off the end of bb
            if j == len(bb):
                a_unmatched[ka] = a
                break
            # skip bb items already matched
            elif bb[j][0] in b_matched:
                pass
            # if bb item is beyond error, then no match for aa item
            elif not within_error(a, bb[j][1]):
                a_unmatched[ka] = a
                break
            # we got a match
            elif is_match(a, bb[j][1]):
                a_matched[ka] = a
                b_matched[bb[j][0]] = bb[j][1]
                break
            # try the next item in bb
            j += 1

    return a_matched, a_unmatched, b_matched, b_unmatched
Ejemplo n.º 6
0
def consult():
    """
    payload structure:
        {
            "ticket_id": Int,
            "consultant": "<consultant>@archive.org, ...",
            "subject": String,
            "body": String,
            "html_body": String,
        }
    :return:
    """
    logger.info("{} request from {}".format(request.method, request.origin))
    auth = request.authorization
    if auth is None:
        message = "Provide basic auth to use this service."
        logger.error(message)
        return jsonify({"Error": message}), 401
    if (auth['username'] != env['ZENDESK_TRIGGER_USERNAME']
            or auth['password'] != env['ZENDESK_TRIGGER_PASSWORD']):
        message = "Invalid Username/Password"
        logger.error(message)
        return jsonify({"Error": message}), 401

    try:
        json = request.get_json()
    except BadRequest as e:
        message = "Bad Request: Could not parse json object"
        logger.error(message)
        return jsonify({"Error": message}), 400

    # verify correct keys
    required_keys = ['consultant', 'subject', 'body', 'html_body', 'ticket_id']
    if any(map(lambda k: k not in json, required_keys)):
        logger.error("Invalid data keys")
        return jsonify({
            "Error":
            "Json object must contain the following non-optional keys",
            "keys":
            ["consultant", "subject", "body", "html_body", "ticket_id"]
        }), 400

    # send mail
    body = INTERNAL_MESSAGE_PLAIN + json['body']
    html_body = INTERNAL_MESSAGE_HTML + json['html_body']
    body = body.replace('\\n', '\n')
    html_body = html_body.replace('\\n', '\n')
    for consultant in json['consultant'].replace(' ', '').split(','):
        mail.send_mail(
            sender='{} <{}>'.format(MAILBOT_NAME, env['MAILBOT_ADDRESS']),
            receiver=consultant,
            subject=SUBJECT_PATTERN.format(json['ticket_id'], json['subject']),
            body=body,
            html_body=html_body,
            cc=['{} <{}>'.format(MAILBOT_CC_NAME, env['MAILBOT_CC_ADDRESS'])])
    return jsonify({"Success": "Consultant has been emailed"}), 200
Ejemplo n.º 7
0
def update_ticket(ticket_id, body, sender, public=True):
    signed = sender[0].split(' ')[0] if public else "{} <{}>".format(*sender)
    comment = comment_template.format(body=body, signed=signed)
    payload = payload_template_dict
    payload['ticket']['comment']['body'] = comment
    payload['ticket']['comment']['public'] = public
    response = post_ticket_update(ticket_id, payload)
    if response.status_code != 200:
        logger.info('ZD API: ' + response.content)
        raise TicketUpdateException()
Ejemplo n.º 8
0
def parse_emails_response(support_emails_response):
    # these configurations match what we get from zendesk
    html2text.config.IGNORE_TABLES = True
    html2text.config.IGNORE_IMAGES = False
    h = html2text.HTML2Text()
    h.body_width = 0
    h.ignore_links = True

    # patterns and formats
    id_pattern = re.compile(b".*UID (\d+) .*")
    time_str_format = '%a,  %d %b %Y %H:%M:%S %z (%Z)'

    # collect decorated messages [(timestamp, comment, id)...]
    support_decorated_comments = []
    logger.info('parsing archive support email data')
    for li in support_emails_response:

        # weird case – something isn't implemented properly in the libraries
        if li == b')':
            continue

        id_bytes, msg_bytes = li
        msg = BytesParser(policy=policy.default).parsebytes(msg_bytes)

        # get id
        msg_id = int(re.match(id_pattern, id_bytes.strip()).group(1))

        # get time stamp
        time_str = msg['Received'].split(';')[-1].strip()
        time_stamp = parser.parse(time_str).timestamp()

        # get message body
        raw = msg.get_body(preferencelist=('plain', ))
        if raw is not None:
            try:
                body = raw.get_content()
            except LookupError as e:
                logger.error(e)
                continue
        else:
            raw = msg.get_body(preferencelist=('html', ))
            if raw is None:
                logger.error('Found message with no plain or html body')
                continue
            try:
                html_content = raw.get_content()
            except LookupError as e:
                logger.error(e)
                continue
            body = h.handle(html_content)

        support_decorated_comments.append((time_stamp, body, msg_id))

    return support_decorated_comments
Ejemplo n.º 9
0
def run():

    USE_TZ = True  # makes datetime.now() not naive
    zd_still_fresh_filename = 'zd_still_fresh.pickle'
    start_time, still_fresh_zd_triples = get_still_fresh_zd_triples(
        zd_still_fresh_filename)

    while True:
        logger.info('START CYCLE')
        end_time, ticket_events = get_zd_updates(start_time)
        start_time = end_time
        zd_decorated_comments = process_events(ticket_events)
        zd_decorated_comments += still_fresh_zd_triples

        support_emails_response = get_support_emails()
        support_decorated_comments = parse_emails_response(
            support_emails_response)

        results = diff.match_msgs(zd_decorated_comments,
                                  support_decorated_comments)

        logger.info("zd_matched: {}/{}".format(len(results[0]),
                                               len(zd_decorated_comments)))
        logger.info("archive_matched: {}/{}".format(
            len(results[2]), len(support_decorated_comments)))

        cleanup(*results, zd_still_fresh_filename, start_time)

        logger.info('END CYCLE - sleeping for {} seconds'.format(
            DIFFBOT_LOOP_WAIT_PERIOD))
        time.sleep(DIFFBOT_LOOP_WAIT_PERIOD)
Ejemplo n.º 10
0
def run():
    # connection = setup_connection()
    # msg_ids = get_new_msg_ids(connection)
    # msgs_data = get_data_for_msgs(connection, msg_ids)
    # pickle.dump(msgs_data, open('message-data.pickle', 'wb'))

    msgs_data = mail.get_raw_mail()

    for msg_id in msgs_data.keys():
        logger.info("parsing message")
        ticket, body, sender, reply_all = parse_msg_data(msgs_data[msg_id])
        if ticket is None:
            logger.info("sending rejection message")
            send_rejection(sender[1])
            continue
        logger.info("updating ticket")
        try:
            update_ticket(ticket, body, sender, reply_all)
        except TicketUpdateException as e:
            logger.error(e)
            continue
        logger.info(
            "ticket updated – ID: {}, Body: {}, Sender: {}, Public: {}".format(
                ticket,
                body.partition('\n')[0][:100], sender, reply_all))
Ejemplo n.º 11
0
def comment_match(c1, c2):

    # remove all whitespace
    c1 = re.sub('[^\w]', '', c1)
    c2 = re.sub('[^\w]', '', c2)
    # c1 = re.sub('\s+', ' ', c1).strip()
    # c2 = re.sub('\s+', ' ', c2).strip()

    # zendesk seems to truncate comment size. This is a good-enough solution
    c1 = c1[:len(c2)]
    c2 = c2[:len(c1)]

    # could be an easy out
    if c1 == c2:
        logger.info('COMPLETE MATCH')
        return True

    # preliminary check
    matcher = difflib.SequenceMatcher(isjunk=lambda c: c in ' \n\r\t')
    matcher.set_seqs(c1, c2)
    qr = matcher.quick_ratio()
    if qr < COMMENT_MATCH_THRESHOLD:
        logger.debug(
            'quick ratio: {} - "{}..." and "{}..." don\'t match'.format(
                qr, c1[:30], c2[:30]))
        return False

    # full check
    dmp = dmp_module.diff_match_patch()
    # dmp.Diff_Timeout = 0.2
    diff = dmp.diff_main(c1, c2)
    # dmp.diff_cleanupSemantic(diff)
    d = dmp.diff_levenshtein(diff)
    ratio = 1 - d / max(len(c1), len(c2))
    verdict = ratio > COMMENT_MATCH_THRESHOLD
    if verdict:
        logger.info('full ratio: {} - "{}..." and "{}..." FULL MATCH'.format(
            round(ratio, 4), c1[:30], c2[:30]))
        logger.debug('DIFF:\n{}'.format(diff))
    elif ratio > COMMENT_MATCH_THRESHOLD * 0.2:
        logger.debug(
            'not close to match with full ratio {}: \n\nDIFF:\n{}'.format(
                round(ratio, 4), diff))
    else:
        logger.debug('full ratio: {} - "{}..." and "{}..." no match'.format(
            round(ratio, 4), c1[:30], c2[:30]))
    return verdict
Ejemplo n.º 12
0
def text_match(zd_msg, archive_msg, threshold=0.90):
    zd_subject = zd_msg[b'ENVELOPE'].subject
    if zd_subject is None:
        logger.warning('found message from zendesk with no subject')
    zd_subject = '' if zd_subject is None else zd_subject.decode()
    archive_subject = archive_msg[b'ENVELOPE'].subject
    if archive_subject is None:
        logger.warning('found message from archive with no subject')
    archive_subject = '' if archive_subject is None else archive_subject.decode(
    )
    try:
        zd_text = zd_msg[b'BODY[TEXT]'].decode()
        archive_text = archive_msg[b'BODY[TEXT]'].decode()
    except:
        logger.error('found msg with no body. subject: "{}" or "{}"'.format(
            zd_subject, archive_subject))
        return False

    # Cut the shit
    if zd_text == archive_text:
        logger.info('COMPLETE MATCH')
        return True

    # Preliminary check
    matcher = difflib.SequenceMatcher(isjunk=lambda c: c in ' \n\r\t')
    matcher.set_seqs(zd_text, archive_text)
    qr = matcher.quick_ratio()
    if qr < threshold:
        logger.debug('quick ratio: {} - "{}" and "{}" don\'t match'.format(
            qr, zd_subject, archive_subject))
        return False

    # Full check
    dmp = dmp_module.diff_match_patch()
    dmp.Diff_Timeout = 0.2
    diff = dmp.diff_main(zd_text, archive_text)
    d = dmp.diff_levenshtein(diff)
    ratio = 1 - d / max(len(zd_text), len(archive_text))
    verdict = ratio > threshold
    if verdict:
        logger.info('full ratio: {} - "{}" and "{}" FULL MATCH'.format(
            round(ratio, 4), zd_subject, archive_subject))
    else:
        logger.debug('full ratio: {} - "{}" and "{}" no match'.format(
            round(ratio, 4), zd_subject, archive_subject))
    return verdict
Ejemplo n.º 13
0
def process_events(ticket_events):
    zendesk_comments = []  # [(timestamp, comment, ticket_id)...]

    for event in ticket_events:
        contents_found = 0  # TODO ditch this variable

        for child in event['child_events']:
            if child['event_type'].lower() == 'comment':
                contents_found += 1
                if contents_found > 1:
                    logger.error(
                        'found {} comment children in single event'.format(
                            contents_found))
                zendesk_comments.append(
                    (event['timestamp'], child['body'], event['ticket_id']))

    logger.info('found {} zendesk comments'.format(len(zendesk_comments)))

    return zendesk_comments
Ejemplo n.º 14
0
def get_support_emails():
    logger.info('getting archive support emails')
    imap4 = imaplib.IMAP4_SSL(host=IMAP_SERVER, port=IMAP_PORT)
    imap4.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD'])
    imap4.select(FROM_ARCHIVE_ACCOUNTS_FOLDER)

    status, response = imap4.uid('search', None, 'ALL')
    if status != 'OK':
        logger.error('unable to search email server')
        exit(1)
    # response of the form: [b'1 2 3 4']
    if response[0] == b'':
        logger.info('no support emails to match against')
        return []

    msg_ids = response[0].decode().split(' ')
    logger.info('found {} support emails'.format(len(msg_ids)))
    responses = []
    for msg_ids_chunk in chunked(msg_ids, 1000):
        logger.debug('getting a message chunk')
        status, response = imap4.uid('fetch', ','.join(msg_ids_chunk),
                                     '(BODY[])')
        if status != 'OK':
            logger.error('unable to fetch from email server')
            exit(1)
        responses.extend(response)
    imap4.close()
    imap4.logout()

    return responses
Ejemplo n.º 15
0
def cleanup(zd_matched, zd_unmatched, archive_matched, archive_unmatched,
            zd_still_fresh_filename, start_time):
    imap4 = imaplib.IMAP4_SSL(host=IMAP_SERVER, port=IMAP_PORT)
    imap4.login(env['DIFFBOT_ADDRESS'], env['DIFFBOT_PASSWORD'])
    imap4.select(FROM_ARCHIVE_ACCOUNTS_FOLDER)

    # move matched emails
    archive_matched_ids = [str(msg_id) for _, _, msg_id in archive_matched]
    logger.info('moving {} matched emails'.format(len(archive_matched_ids)))
    for msg_ids_chunk in chunked(archive_matched_ids, 1000):
        uids = ','.join(msg_ids_chunk)
        result, err = imap4.uid('COPY', uids, MATCHED_ARCHIVE_FOLDER)
        if result != 'OK':
            logger.error('unable to copy items')
        result, delete = imap4.uid('STORE', uids, '+FLAGS', '(\Deleted)')
        if result != 'OK':
            logger.error('unable to delete original versions')

    # deal with unmatched updates from Zendesk
    cutoff = datetime.datetime.now().timestamp() - MINUTES_GRACE_PERIOD * 60
    still_fresh = []
    old = []
    for triple in zd_unmatched:
        if triple[0] > cutoff:
            still_fresh.append(triple)
        else:
            old.append(triple)
    # save still fresh for later
    logger.info('saving {} zd ticket messages for the next round'.format(
        len(still_fresh)))
    pickle.dump((start_time, still_fresh), open(zd_still_fresh_filename, 'wb'))
    # log old unmatched ticket comments
    logger.info('logging {} old zd ticket messages that went unmatched'.format(
        len(old)))
    with open('zd_unmatched.log', 'a') as f:
        for t, c, t_id in old:
            f.write("""
Ticket #{}
Time: {}
Comment:
{}
""".format(t_id, str(datetime.datetime.fromtimestamp(t)), c))

    # move old emails to unmatched
    cutoff = datetime.datetime.now().timestamp() - MINUTES_GRACE_PERIOD * 60
    old_archive_unmatched_ids = [
        str(msg_id) for t, _, msg_id in archive_unmatched if t < cutoff
    ]
    logger.info('moving {} old unmatched archive emails'.format(
        len(old_archive_unmatched_ids)))
    for msg_ids_chunk in chunked(old_archive_unmatched_ids, 1000):
        uids = ','.join(msg_ids_chunk)
        result, err = imap4.uid('COPY', uids, UNMATCHED_ARCHIVE_FOLDER)
        if result != 'OK':
            logger.error('unable to copy items')
        result, delete = imap4.uid('STORE', uids, '+FLAGS', '(\Deleted)')
        if result != 'OK':
            logger.error('unable to delete original versions')

    # we're done here
    imap4.expunge()
    imap4.close()
    imap4.logout()
Ejemplo n.º 16
0
def matching(needles, haystack, sort_key, within_error, is_match):
    """
    :param needles: items to be matched
    :param haystack: candidate matches
    :param sort_key: function for sorting items
    :param within_error: func whether items are within error margin w/respect to sort attr.
    :param is_match: func whether items are match
    :return: four tuple: matched/unmatched from dict1/dict2
    """

    needles.sort(key=sort_key)
    haystack.sort(key=sort_key)

    j0 = 0  # the 'lower bound' index in bb for matching

    matched_needles = set()
    unmatched_needles = set()
    matched_hay = set()
    unmatched_hay = set()

    # import pudb
    # pudb.set_trace()

    for i, triple in enumerate(needles):
        logger.info('Searching for match for item {}/{}'.format(
            i, len(needles)))
        t, c, n = triple

        # move the lower bound up
        while j0 < len(haystack):
            # once we are within error, stop increasing
            if within_error(t, haystack[j0][0]):
                break
            # if we get beyond our needle, stop increasing
            if sort_key(triple) < sort_key(haystack[j0]):
                break
            # any unmatched hay at j0 at this point is unmatched
            if not haystack[j0] in matched_hay:
                unmatched_hay.add(haystack[j0])
            j0 += 1

        j = j0
        MATCH_FOUND = False  # TODO remove
        while True:
            # don't run off the end of haystack
            if j == len(haystack):
                # TODO
                if not MATCH_FOUND:
                    unmatched_needles.add(triple)
                break
            # skip hay already matched
            elif haystack[j] in matched_hay:
                pass
            # if hay is beyond error, then no match for needle
            elif not within_error(t, haystack[j][0]):
                # TODO
                if not MATCH_FOUND:
                    unmatched_needles.add(triple)
                break
            # we got a match
            elif is_match(c, haystack[j][1]):
                matched_needles.add(triple)
                matched_hay.add(haystack[j])
                # TODO: bring this break back when we know that we don't have duplicate emails
                MATCH_FOUND = True
                # break
            # try the next hay
            j += 1

    return matched_needles, unmatched_needles, matched_hay, unmatched_hay