Esempio n. 1
0
def get_messages_for_cleaning(user_email=None, process_id=None):
    clean_process = CleanUserProcess.get_by_id(process_id)
    imap = IMAPHelper()
    imap.login(email=user_email, password=clean_process.source_password)
    msg_ids = imap.list_messages(criteria=clean_process.search_criteria,
                                 only_with_attachments=True, not_migrated=True)
    imap.close()

    if len(msg_ids) > 0:
        if constants.USER_CONNECTION_LIMIT < len(msg_ids):
            n = constants.USER_CONNECTION_LIMIT
        else:
            n = len(msg_ids)
        counter.load_and_increment_counter(
            'cleaning_%s_total_count' % user_email,
            delta=len(msg_ids),
            namespace=str(process_id))
        # chunkify: due to the migration API 1QPS limit
        # should this optimization be used?
        # return [msg_ids[i::n] for i in xrange(n)]
        return [msg_ids]
    else:
        counter.load_and_increment_counter(
            'cleaning_%s_total_count' % user_email,
            delta=0,
            namespace=str(process_id))
        process = CleanUserProcess.get_by_id(process_id)
        process.status = constants.FINISHED
        process.put()
        return []
Esempio n. 2
0
def schedule_user_cleaning(user_email=None, process_id=None):
    all_messages = get_messages_for_cleaning(
        user_email=user_email, process_id=process_id)

    number_of_messages = 0
    for chunk in all_messages:
        number_of_messages += len(chunk)

    process = CleanUserProcess.get_by_id(process_id)
    process.number_of_messages = number_of_messages
    process.put()

    for chunk_ids in all_messages:
        if len(chunk_ids) > 0:
            logging.info('Scheduling user [%s] messages cleaning', user_email)
            deferred.defer(clean_messages, user_email=user_email,
                           chunk_ids=chunk_ids,
                           process_id=process_id)
Esempio n. 3
0
def clean_messages(user_email=None, password=None, chunk_ids=list(),
                   retry_count=0, process_id=None):
    cleaned_successfully = []
    remaining = []
    if len(chunk_ids) <= 0:
        process = CleanUserProcess.get_by_id(process_id)
        process.status = constants.FINISHED
        process.put()
        return True

    try:
        process = CleanUserProcess.get_by_id(process_id)
        imap = IMAPHelper()
        imap.login(email=user_email, password=process.source_password)
        imap.select()

        domain_name = user_email.split('@')[1]
        primary_domain = PrimaryDomain.get_or_create(
                domain_name)

        try:
            drive = DriveHelper(credentials_json=primary_domain.credentials,
                                admin_email=primary_domain.admin_email,
                                refresh_token=primary_domain.refresh_token)
            folder = drive.get_folder(constants.ATTACHMENT_FOLDER)
            if not folder:
                folder = drive.create_folder(constants.ATTACHMENT_FOLDER)
            sub_folder = drive.get_folder(user_email)
            if not sub_folder:
                sub_folder = drive.create_folder(user_email,
                                                 [{'id': folder['id']}])
        except Exception as e:
            logging.error(
                "Couldn't authenticate drive for user %s" % user_email)
            raise e

        try:
            migration = MigrationHelper(
                credentials_json=primary_domain.credentials,
                refresh_token=primary_domain.refresh_token)
        except Exception as e:
            logging.error(
                "Couldn't authenticate migration api for user %s" % user_email)
            raise e

        for message_id in chunk_ids:
            try:
                result = clean_message(msg_id=message_id, imap=imap,
                                       drive=drive,
                                       migration=migration,
                                       folder_id=sub_folder['id'],
                                       user_email=user_email,
                                       process_id=process_id)
                if result:
                    counter.load_and_increment_counter(
                        'cleaning_%s_ok_count' % (user_email),
                        namespace=str(process_id))
                    cleaned_successfully.append(message_id)
                else:
                    counter.load_and_increment_counter(
                        'cleaning_%s_error_count' % user_email,
                        namespace=str(process_id))
                    logging.error(
                        'Error cleaning message ID [%s] for user [%s]: [%s] ',
                        message_id, user_email, result)
            except Exception as e:
                logging.exception(
                    'Failed cleaning individual message ID [%s] for user [%s]',
                    message_id, user_email)
                remaining = []
                if retry_count < constants.MAX_CLEAN_RETRIES:
                    for chunk_msg in chunk_ids:
                        if chunk_msg not in cleaned_successfully:
                            remaining.append(chunk_msg)
                    logging.info(
                        'Scheduling [%s] remaining cleaning messages for user [%s]',
                        len(remaining), user_email)
                    deferred.defer(clean_messages, user_email=user_email,
                                   chunk_ids=remaining,
                                   process_id=process_id,
                                   retry_count=retry_count + 1)
                else:
                    for chunk_msg in chunk_ids:
                        if message_id == chunk_msg:
                            continue
                        if chunk_msg not in cleaned_successfully:
                            remaining.append(chunk_msg)
                    logging.info(
                        'Giving up cleaning message [%s] for '
                        'user [%s]', message_id,
                        user_email)
                    counter.load_and_increment_counter(
                        'cleaning_%s_error_count' % user_email,
                        delta=1,
                        namespace=str(process_id))
                    deferred.defer(clean_messages, user_email=user_email,
                                   chunk_ids=remaining,
                                   process_id=process_id)
                break

    except Exception as e:
        logging.exception('Failed cleaning messages chunk')
        raise e
    finally:
        if imap:
            imap.close()
        if len(chunk_ids) < 10 or (len(cleaned_successfully) + 10 > len(chunk_ids)):
            process.status = constants.FINISHED
            process.put()
Esempio n. 4
0
def delayed_delete_message(msg_id=None, process_id=None,
                           retries=0):
    process = CleanUserProcess.get_by_id(process_id)
    criteria = process.search_criteria

    msg_process = CleanMessageProcess.query(ndb.AND(
        CleanMessageProcess.msg_id == msg_id,
        CleanMessageProcess.clean_process_id == process_id)
    ).get()

    if msg_process.status != constants.MIGRATED:
        if retries < constants.MAX_RETRIES:
            deferred.defer(delayed_delete_message, msg_id=msg_id,
                       process_id=process_id, retries=retries+1,
                       _countdown=60*2**retries, _queue="elimination")
        else:
            logging.error("Couldn't delete msg %s for user %s" %
                (msg_id, process.source_email))
        return

    imap = IMAPHelper()
    imap.login(process.source_email, process.source_password)
    imap.select()

    # Look for the migrated email, if it doesn't exist yet
    # retry later
    try:
        subject = imap.get_subject(msg_id=msg_id)
    except Exception as e:
        if retries < constants.MAX_RETRIES:
            deferred.defer(delayed_delete_message, msg_id=msg_id,
                       process_id=process_id, retries=retries+1,
                       _countdown=60*2**retries)
        else:
            logging.error("Couldn't delete msg %s for user %s, error %s" %
                (msg_id, process.source_email, e.message))
        return

    messages = imap.list_messages(criteria="subject:(%s) label:Migrated-Migrados" % subject)

    if len(messages) < 1:
        if retries < constants.MAX_RETRIES:
            deferred.defer(delayed_delete_message, msg_id=msg_id,
                       process_id=process_id, retries=retries+1,
                       _countdown=60*2**retries, _queue="elimination")
        else:
            logging.error("Couldn't delete msg %s for user %s" %
                (msg_id, process.source_email))
        return

    imap.delete_message(msg_id=msg_id, criteria=criteria)
    imap.close()

    msg_process.status = constants.FINISHED
    msg_process.put()

    all_done = True

    all_cleaning_messages = CleanMessageProcess.query(
        CleanMessageProcess.clean_process_id == process_id
    ).fetch()

    progress = 0
    for message in all_cleaning_messages:
        if not message.status == constants.FINISHED:
            all_done = False
        else:
            progress += 1

    if all_done:
        process.status = constants.FINISHED

    utc_now = datetime.datetime.utcnow()
    local_tz = pytz.timezone('America/Bogota')
    tz_offset = local_tz.utcoffset(utc_now)
    now = utc_now + tz_offset
    process.progress = progress
    process.latest_activity = "%s" % now
    process.put()
Esempio n. 5
0
def clean_message(msg_id='', imap=None, drive=None,
                  migration=None, folder_id=None,
                  user_email=None, process_id=None):
    logging.info("Trying to clean message %s for user %s" % (msg_id, user_email))
    process = CleanUserProcess.get_by_id(process_id)
    criteria = process.search_criteria

    msg_process = CleanMessageProcess.get_or_create(msg_id, process_id)
    if msg_process.status == constants.FINISHED:
        return True

    result, message = imap.get_message(msg_id=msg_id)

    if result != 'OK':
        raise Exception("Couldn't read message")

    result, label_data = imap.get_message_labels(msg_id=msg_id)
    labels = []
    if label_data and label_data[0]:
        labels = (((label_data[0].split('('))[2].split(')'))[0]).split()
    mail = email.message_from_string(message[0][1])
    attachments = []
    number_of_attachments = 0

    if mail.get_content_maintype() == 'multipart':
        for part in mail.walk():
            if part.get_content_maintype() == 'multipart':
                continue
            if part.get('Content-Disposition') is None:
                continue

            # Is attachment
            attached = False
            number_of_attachments += 1
            attachment_process = CleanAttachmentProcess.get_or_create(
                msg_id, msg_process.key.id(), number_of_attachments
            )

            file_id = ''

            if (attachment_process.status == constants.FINISHED and
                attachment_process.url and
                attachment_process.filename and
                attachment_process.file_id
            ):
                attached = True
                attachments.append(
                    (attachment_process.url, attachment_process.filename))
                file_id = attachment_process.file_id

            if not attached:
                attachment = part.get_payload(decode=True)
                mime_type = part.get_content_type()
                filename = part.get_filename()
                text, encoding = email.Header.decode_header(filename)[0]
                if encoding:
                    filename = text.decode(encoding)

                insert_result = drive.insert_file(filename=filename,
                                                  mime_type=mime_type,
                                                  content=attachment,
                                                  parent_id=folder_id)
                if not insert_result:
                    attachment_process.error_description = (
                        "Error inserting file"
                    )
                    attachment_process.put()
                    raise Exception("Insert file error")

                drive_url = insert_result['webContentLink']
                file_id = insert_result['id']

                attachment_process.url = drive_url
                attachment_process.file_id = file_id
                attachment_process.status = constants.FINISHED
                attachment_process.filename = filename
                attachment_process.put()

                attachments.append((drive_url, filename))

            permission_result = drive.insert_permission(file_id=file_id,
                                                 value=user_email,
                                                 type='user', role='writer')

            if not permission_result:
                attachment_process.error_description = (
                    "Permission error"
                )
                attachment_process.put()
                raise Exception("Permission error")

            part.set_payload("")
            for header in part.keys():
                part.__delitem__(header)

    msg_process.status = constants.DUPLICATED
    msg_process.put()

    for url, filename in attachments:
        body_suffix = '<a href="%s">%s</a>' % (url, filename)
        new_payload = email.MIMEText.MIMEText(body_suffix.encode('utf-8'), 'html', 'utf-8')
        mail.attach(new_payload)

    # Send new mail
    time.sleep(1.2)
    migration_result = migration.migrate_mail(user_email=user_email, msg=mail,
                                    labels=labels)
    if not migration_result:
        msg_process.error_description = "Migration error"
        msg_process.put()
        raise Exception("Migration error")
    else:
        msg_process.status = constants.MIGRATED
        msg_process.put()

        # Then delete previous email
        logging.info("Delaying delete of msg %s for user %s" % (msg_id, user_email))
        deferred.defer(delayed_delete_message, msg_id=msg_id,
                       process_id=process_id, _countdown=30, _queue="elimination")

        return True
Esempio n. 6
0
def list_process():
    form = CleanUserProcessForm()
    user = users.get_current_user()
    clean_process_saved = False

    clean_processes = []
    clean_process_query = CleanUserProcess.query(CleanUserProcess.owner_email == user.email()).order()
    query_params = {}
    if request.method == 'POST':
        if form.validate_on_submit():
            primary_domain = PrimaryDomain.get_or_create(
                domain_name = user.email().split('@')[1])

            logged_in = 'NO'
            current_user = users.get_current_user()
            if current_user.email() == primary_domain.admin_email:
                imap = IMAPHelper()
                logged_in, _ = imap.login(
                    form.data['source_email'], form.data['source_password'])
                imap.close()

            if logged_in != 'OK':
                form.source_email.errors.append(
                    "Can't access the email with those credentials")
            else:
                clean_user_process = CleanUserProcess(
                    owner_email=user.email(),
                    destination_message_email=user.email(),
                    status=constants.STARTED
                )
                for key, value in form.data.iteritems():
                    setattr(clean_user_process, key, value)
                clean_process_key = clean_user_process.put()
                clean_process_saved = True
                # TODO: process does not appears immediately after it's saved
                # launch Pipeline
                deferred.defer(schedule_user_cleaning, user_email=form.data['source_email'],
                               process_id=clean_process_key.id())

    is_prev = request.args.get('prev', False)
    url_cursor = request.args.get('cursor', None)
    cursor = Cursor(urlsafe=url_cursor) if url_cursor else None

    if is_prev:
        clean_process_query = clean_process_query.order(
            CleanUserProcess.created)
        cursor = cursor.reversed()
    else:
        clean_process_query = clean_process_query.order(
            -CleanUserProcess.created)

    data, next_curs, more = clean_process_query.fetch_page(
        constants.PAGE_SIZE, start_cursor=cursor)
    clean_processes.extend(data)

    if is_prev:
        prev_curs = next_curs.reversed().urlsafe() if more else None
        next_curs = url_cursor
    else:
        prev_curs = url_cursor
        next_curs = next_curs.urlsafe() if more else None

    return render_template('process.html', form=form, user=user.email(),
                           clean_process_saved=clean_process_saved,
                           clean_processes=clean_processes, next_curs=next_curs,
                           more=more, prev_curs=prev_curs)