Exemple #1
0
def submit_extract_keywords_hit(note):
    """Create a Mechanical Turk HIT that asks a worker to
    choose keywords and definitions from the given note."""

    try:
        MTURK_HOST = os.environ['MTURK_HOST']
    except:
        logger.warn('Could not find Mechanical Turk secrets, not running submit_extract_keywords_hit')
        return

    connection = MTurkConnection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY,
                                 host=MTURK_HOST)

    if note.course.school:
        title = KEYWORDS_HIT_TITLE_TEMPLATE.format(course=note.course.name, school=note.course.school.name)
    else:
        title = KEYWORDS_HIT_TITLE_TEMPLATE.format(course=note.course.name, school=note.course.department.school.name)

    overview = Overview()
    overview.append(FormattedContent(KEYWORDS_HIT_OVERVIEW_TEMPLATE.format(domain=Site.objects.get_current(),
                                                                  link=note.get_absolute_url())))

    keyword_fta = FreeTextAnswer()
    keyword_fta.num_lines = 1

    definition_fta = FreeTextAnswer()
    definition_fta.num_lines = 3

    question_form = QuestionForm()
    question_form.append(overview)

    for i in range(min(len(KEYWORDS_HIT_KEYWORD_FIELDS), len(KEYWORDS_HIT_DEFINITION_FIELDS))):
        keyword_content = QuestionContent()
        keyword_content.append_field('Title', KEYWORDS_HIT_KEYWORD_FIELDS[i][1])
        keyword_question = Question(identifier=KEYWORDS_HIT_KEYWORD_FIELDS[i][0],
                                    content=keyword_content,
                                    answer_spec=AnswerSpecification(keyword_fta),
                                    is_required=True if i <= 10 else False)
        question_form.append(keyword_question)

        definition_content = QuestionContent()
        definition_content.append_field('Title', KEYWORDS_HIT_DEFINITION_FIELDS[i][1])
        definition_question = Question(identifier=KEYWORDS_HIT_DEFINITION_FIELDS[i][0],
                                       content=definition_content,
                                       answer_spec=AnswerSpecification(definition_fta),
                                       is_required=False)
        question_form.append(definition_question)

    hit = connection.create_hit(questions=question_form, max_assignments=1,
                          title=title, description=KEYWORDS_HIT_DESCRIPTION,
                          keywords=KEYWORDS_HIT_KEYWORDS, duration=KEYWORDS_HIT_DURATION,
                          reward=KEYWORDS_HIT_REWARD, qualifications=KEYWORDS_HIT_QUALIFICATION,
                          annotation=str(note.id))[0]

    HIT.objects.create(HITId=hit.HITId, note=note, processed=False)
Exemple #2
0
def check_notes_mailbox():
    MTURK_HOST = run_mturk('get_extract_keywords_results')
    if not MTURK_HOST:
        return

    try:
        MAILBOX_USER = os.environ['NOTES_MAILBOX_USERNAME']
        MAILBOX_PASSWORD = os.environ['NOTES_MAILBOX_PASSWORD']
        FILEPICKER_API_KEY = os.environ['FILEPICKER_API_KEY']
    except:
        logger.warn(
            'Could not find notes mailbox secrets, not running check_notes_mailbox'
        )
        return

    connection = MTurkConnection(settings.AWS_ACCESS_KEY_ID,
                                 settings.AWS_SECRET_ACCESS_KEY,
                                 host=MTURK_HOST)

    mailbox = poplib.POP3_SSL('pop.gmail.com', 995)
    mailbox.user(MAILBOX_USER)
    mailbox.pass_(MAILBOX_PASSWORD)
    numMessages = len(mailbox.list()[1])
    for i in range(numMessages):
        # construct message object from raw message
        raw_message_string = '\n'.join(mailbox.retr(i + 1)[1])
        message = email.message_from_string(raw_message_string)

        if not message.is_multipart():
            logger.warn('Got an email with no attachments')
            continue

        attachments = []
        message_body = ''

        message_parts = message.get_payload()
        for part in message_parts:
            # Look for the message's plain text body
            if part.get_content_type(
            ) == 'text/plain' and part['Content-Disposition'] is None:
                message_body = part.get_payload()

            # Look for attachments
            elif part['Content-Disposition'] and 'attachment;' in part[
                    'Content-Disposition']:
                attachment_mimetype = part.get_content_type()
                attachment_filename = re.search(
                    CONTENT_DISPOSITION_REGEX,
                    part['Content-Disposition']).group('filename')

                if part['Content-Transfer-Encoding'] == 'base64':
                    attachment_data = base64.decodestring(part.get_payload())
                else:
                    attachment_data = part.get_payload()

                # Upload attachment to filepicker
                resp = requests.post('https://www.filepicker.io/api/store/S3?key={key}&policy={policy}&' \
                                     'signature={signature}&mimetype={mimetype}&filename={filename}'
                                     .format(key=FILEPICKER_API_KEY, policy=FP_POLICY_READ_WRITE,
                                             signature=FP_SIGNATURE_READ_WRITE, mimetype=attachment_mimetype,
                                             filename=attachment_filename),
                                      data=attachment_data)

                if resp.status_code == 200:
                    url = json.loads(resp.text)['url']
                    url = url + '?policy={policy}&amp;signature={signature}'\
                        .format(policy=FP_POLICY_READ, signature=FP_SIGNATURE_READ)
                    attachments.append((url, attachment_filename))
                else:
                    logger.warn('Could not upload an attachment to filepicker')

        message_subject = message['Subject']

        overview = Overview()
        overview.append(
            FormattedContent(
                EMAIL_HIT_OVERVIEW_TEMPLATE.format(subject=message_subject,
                                                   body=message_body,
                                                   attachments='')))

        single_line_answer = FreeTextAnswer()
        single_line_answer.num_lines = 1

        question_form = QuestionForm()
        question_form.append(overview)

        course_spam_content = QuestionContent()
        course_spam_content.append_field(
            'Title',
            'Does the email contain course notes (check attachments below)?')
        answer = SelectionAnswer(style='dropdown',
                                 selections=[('No', 'no'), ('Yes', 'yes')])
        course_spam = Question(identifier=COURSE_SPAM_QID,
                               content=course_spam_content,
                               answer_spec=AnswerSpecification(answer),
                               is_required=True)
        question_form.append(course_spam)

        course_name_content = QuestionContent()
        course_name_content.append_field('Title', 'Course Name')
        course_name = Question(
            identifier=COURSE_NAME_QID,
            content=course_name_content,
            answer_spec=AnswerSpecification(single_line_answer),
            is_required=True)
        question_form.append(course_name)

        instructor_names_content = QuestionContent()
        instructor_names_content.append_field('Title', 'Instructor Name(s)')
        instructor_names = Question(
            identifier=INSTRUCTOR_NAMES_QID,
            content=instructor_names_content,
            answer_spec=AnswerSpecification(single_line_answer),
            is_required=False)
        question_form.append(instructor_names)

        school_name_content = QuestionContent()
        school_name_content.append_field('Title', 'School Name')
        school_name = Question(
            identifier=SCHOOL_NAME_QID,
            content=school_name_content,
            answer_spec=AnswerSpecification(single_line_answer),
            is_required=True)
        question_form.append(school_name)

        department_name_content = QuestionContent()
        department_name_content.append_field('Title', 'Department Name')
        department_name = Question(
            identifier=DEPARTMENT_NAME_QID,
            content=department_name_content,
            answer_spec=AnswerSpecification(single_line_answer),
            is_required=False)
        question_form.append(department_name)

        for i in range(len(attachments)):
            overview = Overview()
            overview.append(
                FormattedContent(
                    EMAIL_HIT_ATTACHMENT_OVERVIEW_TEMPLATE.format(
                        link=attachments[i][0], name=attachments[i][1])))

            question_form.append(overview)

            note_title_content = QuestionContent()
            note_title_content.append_field('Title', 'Note Title')
            note_title = Question(
                identifier=NOTE_TITLE_QID_TEMPLATE + str(i),
                content=note_title_content,
                answer_spec=AnswerSpecification(single_line_answer),
                is_required=True)
            question_form.append(note_title)

            note_category_content = QuestionContent()
            note_category_content.append_field('Title', 'Note Category')
            answer = SelectionAnswer(style='dropdown',
                                     selections=NOTE_CATEGORIES_FOR_MTURK)
            note_category = Question(identifier=NOTE_CATEGORY_QID_TEMPLATE +
                                     str(i),
                                     content=note_category_content,
                                     answer_spec=AnswerSpecification(answer),
                                     is_required=True)
            question_form.append(note_category)

        hit = connection.create_hit(questions=question_form,
                                    max_assignments=1,
                                    title=EMAIL_HIT_TITLE,
                                    description=EMAIL_HIT_DESCRIPTION,
                                    keywords=EMAIL_HIT_KEYWORDS,
                                    duration=EMAIL_HIT_DURATION,
                                    reward=EMAIL_HIT_REWARD,
                                    qualifications=EMAIL_HIT_QUALIFICATION)[0]
Exemple #3
0
def check_notes_mailbox():
    try:
        MAILBOX_USER = os.environ['NOTES_MAILBOX_USERNAME']
        MAILBOX_PASSWORD = os.environ['NOTES_MAILBOX_PASSWORD']
        FILEPICKER_API_KEY = os.environ['FILEPICKER_API_KEY']
        MTURK_HOST = os.environ['MTURK_HOST']
    except:
        logger.warn('Could not find notes mailbox secrets, not running check_notes_mailbox')
        return

    connection = MTurkConnection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY,
                                 host=MTURK_HOST)

    mailbox = poplib.POP3_SSL('pop.gmail.com', 995)
    mailbox.user(MAILBOX_USER)
    mailbox.pass_(MAILBOX_PASSWORD)
    numMessages = len(mailbox.list()[1])
    for i in range(numMessages):
        # construct message object from raw message
        raw_message_string = '\n'.join(mailbox.retr(i+1)[1])
        message = email.message_from_string(raw_message_string)

        if not message.is_multipart():
            logger.warn('Got an email with no attachments')
            continue

        attachments = []
        message_body = ''

        message_parts = message.get_payload()
        for part in message_parts:
            # Look for the message's plain text body
            if part.get_content_type() == 'text/plain' and part['Content-Disposition'] is None:
                message_body = part.get_payload()

            # Look for attachments
            elif part['Content-Disposition'] and 'attachment;' in part['Content-Disposition']:
                attachment_mimetype = part.get_content_type()
                attachment_filename = re.search(CONTENT_DISPOSITION_REGEX, part['Content-Disposition']).group('filename')

                if part['Content-Transfer-Encoding'] == 'base64':
                    attachment_data = base64.decodestring(part.get_payload())
                else:
                    attachment_data = part.get_payload()

                # Upload attachment to filepicker
                resp = requests.post('https://www.filepicker.io/api/store/S3?key={key}&policy={policy}&' \
                                     'signature={signature}&mimetype={mimetype}&filename={filename}'
                                     .format(key=FILEPICKER_API_KEY, policy=FP_POLICY_READ_WRITE,
                                             signature=FP_SIGNATURE_READ_WRITE, mimetype=attachment_mimetype,
                                             filename=attachment_filename),
                                      data=attachment_data)

                if resp.status_code == 200:
                    url = json.loads(resp.text)['url']
                    url = url + '?policy={policy}&amp;signature={signature}'\
                        .format(policy=FP_POLICY_READ, signature=FP_SIGNATURE_READ)
                    attachments.append((url, attachment_filename))
                else:
                    logger.warn('Could not upload an attachment to filepicker')

        message_subject = message['Subject']

        overview = Overview()
        overview.append(FormattedContent(
            EMAIL_HIT_OVERVIEW_TEMPLATE.format(subject=message_subject, body=message_body, attachments='')))

        single_line_answer = FreeTextAnswer()
        single_line_answer.num_lines = 1

        question_form = QuestionForm()
        question_form.append(overview)

        course_spam_content = QuestionContent()
        course_spam_content.append_field('Title', 'Does the email contain course notes (check attachments below)?')
        answer = SelectionAnswer(style='dropdown', selections=[('No', 'no'), ('Yes', 'yes')])
        course_spam = Question(identifier=COURSE_SPAM_QID,
                               content=course_spam_content,
                               answer_spec=AnswerSpecification(answer),
                               is_required=True)
        question_form.append(course_spam)

        course_name_content = QuestionContent()
        course_name_content.append_field('Title', 'Course Name')
        course_name = Question(identifier=COURSE_NAME_QID,
                               content=course_name_content,
                               answer_spec=AnswerSpecification(single_line_answer),
                               is_required=True)
        question_form.append(course_name)

        instructor_names_content = QuestionContent()
        instructor_names_content.append_field('Title', 'Instructor Name(s)')
        instructor_names = Question(identifier=INSTRUCTOR_NAMES_QID,
                                    content=instructor_names_content,
                                    answer_spec=AnswerSpecification(single_line_answer),
                                    is_required=False)
        question_form.append(instructor_names)

        school_name_content = QuestionContent()
        school_name_content.append_field('Title', 'School Name')
        school_name = Question(identifier=SCHOOL_NAME_QID,
                               content=school_name_content,
                               answer_spec=AnswerSpecification(single_line_answer),
                               is_required=True)
        question_form.append(school_name)

        department_name_content = QuestionContent()
        department_name_content.append_field('Title', 'Department Name')
        department_name = Question(identifier=DEPARTMENT_NAME_QID,
                                   content=department_name_content,
                                   answer_spec=AnswerSpecification(single_line_answer),
                                   is_required=False)
        question_form.append(department_name)

        for i in range(len(attachments)):
            overview = Overview()
            overview.append(FormattedContent(
                EMAIL_HIT_ATTACHMENT_OVERVIEW_TEMPLATE.format(link=attachments[i][0], name=attachments[i][1])))

            question_form.append(overview)

            note_title_content = QuestionContent()
            note_title_content.append_field('Title', 'Note Title')
            note_title = Question(identifier=NOTE_TITLE_QID_TEMPLATE + str(i),
                                  content=note_title_content,
                                  answer_spec=AnswerSpecification(single_line_answer),
                                  is_required=True)
            question_form.append(note_title)

            note_category_content = QuestionContent()
            note_category_content.append_field('Title', 'Note Category')
            answer = SelectionAnswer(style='dropdown', selections=NOTE_CATEGORIES_FOR_MTURK)
            note_category = Question(identifier=NOTE_CATEGORY_QID_TEMPLATE + str(i),
                                     content=note_category_content,
                                     answer_spec=AnswerSpecification(answer),
                                     is_required=True)
            question_form.append(note_category)


        hit = connection.create_hit(questions=question_form, max_assignments=1,
                      title=EMAIL_HIT_TITLE, description=EMAIL_HIT_DESCRIPTION,
                      keywords=EMAIL_HIT_KEYWORDS, duration=EMAIL_HIT_DURATION,
                      reward=EMAIL_HIT_REWARD, qualifications=EMAIL_HIT_QUALIFICATION)[0]
Exemple #4
0
def submit_extract_keywords_hit(note):
    """Create a Mechanical Turk HIT that asks a worker to
    choose keywords and definitions from the given note."""

    MTURK_HOST = run_mturk('submit_extract_keywords_hit')
    if not MTURK_HOST:
        return

    connection = MTurkConnection(settings.AWS_ACCESS_KEY_ID,
                                 settings.AWS_SECRET_ACCESS_KEY,
                                 host=MTURK_HOST)

    if note.course.school:
        title = KEYWORDS_HIT_TITLE_TEMPLATE.format(
            course=note.course.name, school=note.course.school.name)
    else:
        title = KEYWORDS_HIT_TITLE_TEMPLATE.format(
            course=note.course.name, school=note.course.department.school.name)

    overview = Overview()
    overview.append(
        FormattedContent(
            KEYWORDS_HIT_OVERVIEW_TEMPLATE.format(
                domain=Site.objects.get_current(),
                link=note.get_absolute_url())))

    keyword_fta = FreeTextAnswer()
    keyword_fta.num_lines = 1

    definition_fta = FreeTextAnswer()
    definition_fta.num_lines = 3

    question_form = QuestionForm()
    question_form.append(overview)

    for i in range(
            min(len(KEYWORDS_HIT_KEYWORD_FIELDS),
                len(KEYWORDS_HIT_DEFINITION_FIELDS))):
        keyword_content = QuestionContent()
        keyword_content.append_field('Title',
                                     KEYWORDS_HIT_KEYWORD_FIELDS[i][1])
        keyword_question = Question(
            identifier=KEYWORDS_HIT_KEYWORD_FIELDS[i][0],
            content=keyword_content,
            answer_spec=AnswerSpecification(keyword_fta),
            is_required=True if i <= 10 else False)
        question_form.append(keyword_question)

        definition_content = QuestionContent()
        definition_content.append_field('Title',
                                        KEYWORDS_HIT_DEFINITION_FIELDS[i][1])
        definition_question = Question(
            identifier=KEYWORDS_HIT_DEFINITION_FIELDS[i][0],
            content=definition_content,
            answer_spec=AnswerSpecification(definition_fta),
            is_required=False)
        question_form.append(definition_question)

    hit = connection.create_hit(questions=question_form,
                                max_assignments=1,
                                title=title,
                                description=KEYWORDS_HIT_DESCRIPTION,
                                keywords=KEYWORDS_HIT_KEYWORDS,
                                duration=KEYWORDS_HIT_DURATION,
                                reward=KEYWORDS_HIT_REWARD,
                                qualifications=KEYWORDS_HIT_QUALIFICATION,
                                annotation=str(note.id))[0]

    KeywordExtractionHIT.objects.create(HITId=hit.HITId,
                                        note=note,
                                        processed=False)