Example #1
0
def submit_extract_keywords_hit(note):
    """Create a Mechanical Turk HIT that asks a worker to
    choose keywords and definitions from the given note."""

    try:
        MTURK_HOST = os.environ['MTURK_HOST']
    except:
        logger.warn('Could not find Mechanical Turk secrets, not running submit_extract_keywords_hit')
        return

    connection = MTurkConnection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY,
                                 host=MTURK_HOST)

    if note.course.school:
        title = KEYWORDS_HIT_TITLE_TEMPLATE.format(course=note.course.name, school=note.course.school.name)
    else:
        title = KEYWORDS_HIT_TITLE_TEMPLATE.format(course=note.course.name, school=note.course.department.school.name)

    overview = Overview()
    overview.append(FormattedContent(KEYWORDS_HIT_OVERVIEW_TEMPLATE.format(domain=Site.objects.get_current(),
                                                                  link=note.get_absolute_url())))

    keyword_fta = FreeTextAnswer()
    keyword_fta.num_lines = 1

    definition_fta = FreeTextAnswer()
    definition_fta.num_lines = 3

    question_form = QuestionForm()
    question_form.append(overview)

    for i in range(min(len(KEYWORDS_HIT_KEYWORD_FIELDS), len(KEYWORDS_HIT_DEFINITION_FIELDS))):
        keyword_content = QuestionContent()
        keyword_content.append_field('Title', KEYWORDS_HIT_KEYWORD_FIELDS[i][1])
        keyword_question = Question(identifier=KEYWORDS_HIT_KEYWORD_FIELDS[i][0],
                                    content=keyword_content,
                                    answer_spec=AnswerSpecification(keyword_fta),
                                    is_required=True if i <= 10 else False)
        question_form.append(keyword_question)

        definition_content = QuestionContent()
        definition_content.append_field('Title', KEYWORDS_HIT_DEFINITION_FIELDS[i][1])
        definition_question = Question(identifier=KEYWORDS_HIT_DEFINITION_FIELDS[i][0],
                                       content=definition_content,
                                       answer_spec=AnswerSpecification(definition_fta),
                                       is_required=False)
        question_form.append(definition_question)

    hit = connection.create_hit(questions=question_form, max_assignments=1,
                          title=title, description=KEYWORDS_HIT_DESCRIPTION,
                          keywords=KEYWORDS_HIT_KEYWORDS, duration=KEYWORDS_HIT_DURATION,
                          reward=KEYWORDS_HIT_REWARD, qualifications=KEYWORDS_HIT_QUALIFICATION,
                          annotation=str(note.id))[0]

    HIT.objects.create(HITId=hit.HITId, note=note, processed=False)
def generate_question_forms(task_item, retval=DEFAULT_RETVAL):
    """
    Works on the output of prepare_media by generating a QuestionForm
    for each page in retval. Returns a list of QuestionForm instances.
    """
    pages = retval
    task_config = task_item.config
    overview = _gen_overview()

    retval = []
    for page in pages:

        qf = QuestionForm()
        qf.append(overview)

        for s in page:
            qc = QuestionContent()
            binary_content = {
                'type': s['type'],
                'subtype': s['subtype'],
                'dataurl': '%s%s' % (DEFAULT_IMAGE_HOST, s['dataurl']),
                #'alttext': s['sentence']}
                'alttext': 'no cheating!'
            }
            qc.append('Binary', binary_content)
            fta = FreeTextAnswer()
            ansp = AnswerSpecification(fta)
            q = Question(identifier=str(uuid.uuid4()),
                         content=qc,
                         answer_spec=ansp)
            qf.append(q)
        retval.append(qf)
    return retval
Example #3
0
    def question_form(self):

        qc = QuestionContent()
        #        qc.append_field( 'Title', 'Is she hot?' )
        qc.append(
            Binary(
                'image', 'jpg',
                'http://www.miranchomeatmarket.com/images/T-%20bone%20steak.jpg',
                'steak'))
        q = Question(identifier="This is the first girl!",
                     content=qc,
                     answer_spec=AnswerSpecification(FreeTextAnswer()),
                     is_required=True,
                     display_name="This is display name")
        qf = QuestionForm()
        qf.append(q)

        if self.hit_type_id:
            try:
                create_hit_rs = self.connect.create_hit(
                    hit_type=self.hit_type_id,
                    question=qf,
                    lifetime=datetime.timedelta(days=14),
                    max_assignments=10,
                    annotation="This is a annotation")
            except MTurkRequestError as e:
                print "create hit type error:\n status: %s reason: %s\n body: %s" % (
                    e.status, e.reason, e.body)
            else:
                print "success!! key: %s" % create_hit_rs
Example #4
0
    def get_question():
        # create content for a question
        qn_content = QuestionContent()
        qn_content.append_field('Title', 'Boto no hit type question content')
        qn_content.append_field('Text', 'What is a boto no hit type?')

        # create the question specification
        qn = Question(identifier=str(uuid.uuid4()),
                      content=qn_content,
                      answer_spec=AnswerSpecification(FreeTextAnswer()))
        return qn
Example #5
0
File: amt.py Project: zhydhkcws/CDB
def make_free_text_question_form(questions):
    if not questions:
        raise ValueError('Questions cannot be empty!')
    question_form = QuestionForm()

    for q in questions:
        qid = q['id']
        for field in q['columns']:
            if field == q['columns'][0]:
                hint = SimpleField('Title', q['content'])
                question_form.append(Overview([hint]))
            field_id = str(qid) + free_sep + field
            q_text = SimpleField('Text', field)
            q_content = QuestionContent([q_text])
            cons = Constraints([LengthConstraint(min_length=1, max_length=100)])
            answer_spec = AnswerSpecification(FreeTextAnswer(constraints=cons))
            question = Question(field_id, q_content, answer_spec, True)
            question_form.append(question)

    return question_form
Example #6
0
    def make_question_form_HIT(self,
                               audio_clip_urls,
                               hit_title,
                               question_title,
                               description,
                               keywords,
                               duration=DEFAULT_DURATION,
                               reward=DEFAULT_REWARD):
        overview = Overview()
        overview.append_field("Title", hit_title)
        #overview.append(FormattedContent('<a target = "_blank" href="url">hyperlink</a>'))
        question_form = QuestionForm()
        question_form.append(overview)
        for ac in audio_clip_urls:
            audio_html = self.transcription_question.replace(
                self.audio_url_tag, ac)
            qc = QuestionContent()
            qc.append_field("Title", question_title)
            qc.append(FormattedContent(audio_html))
            fta = FreeTextAnswer()
            q = Question(identifier="transcription",
                         content=qc,
                         answer_spec=AnswerSpecification(fta))
            question_form.append(q)
        try:
            response = self.conn.create_hit(questions=question_form,
                                            max_assignments=1,
                                            title=hit_title,
                                            description=description,
                                            keywords=keywords,
                                            duration=duration,
                                            reward=reward)
        except MTurkRequestError as e:
            if e.reason != "OK":
                raise

        return question_form, response
Example #7
0
#make overview

overview = Overview()
overview.append_field('Title',
                      'Find three Macys.com Product Web IDs That Match')
overview.append(
    FormattedContent('<img src="' + PIN_IMAGE_URL + '" alt="Pintrest Image" />'
                     '<br />' + PIN_IMAGE_TITLE))

#make webid1

qc1 = QuestionContent()
qc1.append_field('Title', 'First WebID Code')

fta1 = FreeTextAnswer(num_lines=1)

q1 = Question(identifier="FirstWebCode",
              content=qc1,
              answer_spec=AnswerSpecification(fta1))

#make webid2

qc2 = QuestionContent()
qc2.append_field('Title', 'Second WebID Code')

fta2 = FreeTextAnswer(num_lines=1)

q2 = Question(identifier="SecondWebCode",
              content=qc2,
              answer_spec=AnswerSpecification(fta2))
Example #8
0
def PublishTasks(hitNum, maxAssignments):
    # sandbox in which to simulate: mechanicalturk.sandbox.amazonaws.com
    # real environment: mechanicalturk.amazonaws.com
    mtc = MTurkConnection(host='mechanicalturk.amazonaws.com')

    # print mtc.APIVersion
    # print mtc.get_account_balance()
    # print mtc.get_reviewable_hits()
    # print mtc.get_all_hits()

    #---------------  BUILD OVERVIEW -------------------

    # jbragg: Modified maximum reward description.
    #title = '(Maximum reward possible: $70) Identify the relation between two entities in English sentences'
    title = 'Identify the relation between two entities in English sentences'
    #description = 'You will be given English sentences in which your task is to identify the relation between two designated entities. Your reward will depend on how many questions you have answered. The maximum reward you can earn is $70.'
    description = 'You will be given English sentences in which your task is to identify the relation between two designated entities. Your reward will depend on how many questions you have answered. The maximum reward you can earn is approximately $5.'
    keywords = 'English sentences, relation identification'

    ratings = [('Very Bad', '-2'), ('Bad', '-1'), ('Not bad', '0'),
               ('Good', '1'), ('Very Good', '1')]

    #---------------  BUILD OVERVIEW -------------------

    overview = Overview()
    overview_title = 'Exercise link (please copy the link and paste it in your browser if it cannot be opened directly.)'
    link = '<a target="_blank"' ' href="http://128.208.3.167:3000/mturk">' ' http://128.208.3.167:3000/mturk</a>'
    # jbragg: Commented out long-term bonus.
    instructions = '<p>Instructions:</p><ul><li>You will be presented with sentences that have a person and a location highlighted.</li><li>Your task is to determine which of the 5 designated relations are expressed between the person and location.</li><li>You&#39;ll get paid $0.50 after each successful set of 20 questions<!-- -- plus a bonus of $2.00 after every 10 batches (equal to 200 questions)-->.</li><li>We know the correct answers to some of these sentence questions, and you can stay if you get these questions right.</li><li>You can start by going to the external link above now. After you finish all the questions, you will be provided with a confirm code, used for authentication and determining the appropriate amount of money as the payment.</li><li>In very rare cases where the website crashes, you could click backward and then forward on your browser to reload the question. It won\'t affect the payment because all the questions you have answered are recorded, on which the amount of payment is based. So please don\'t worry about that.</li></ul>'
    overview_content = link + instructions
    overview.append_field('Title', overview_title)
    overview.append(FormattedContent(overview_content))

    #---------------  BUILD QUESTION 1 -------------------

    qc1 = QuestionContent()
    qc1.append_field('Title', 'How looks the design ?')

    fta1 = SelectionAnswer(min=1,
                           max=1,
                           style='dropdown',
                           selections=ratings,
                           type='text',
                           other=False)

    q1 = Question(identifier='design',
                  content=qc1,
                  answer_spec=AnswerSpecification(fta1),
                  is_required=True)

    #---------------  BUILD QUESTION 2 -------------------

    qc2 = QuestionContent()
    qc2.append_field(
        'Title',
        'Confirm code \n1. The code will be provided to you as you finish from the exercise link. \n2. The code will be verified before paying. \n3. By the end of every 20 questions (as a batch), You can choose to finish and get a confirm code, or continue.'
    )

    fta2 = FreeTextAnswer()

    q2 = Question(identifier="Confirm_code",
                  content=qc2,
                  answer_spec=AnswerSpecification(fta2))

    #--------------- BUILD THE QUESTION FORM -------------------

    question_form = QuestionForm()
    question_form.append(overview)
    # question_form.append(q1)
    question_form.append(q2)

    #--------------- CREATE HITs -------------------

    HIT_num = hitNum
    for i in range(HIT_num):
        # max_assignments: how many replicas this HIT has
        mtc.create_hit(questions=question_form,
                       max_assignments=maxAssignments,
                       title=title,
                       description=description,
                       keywords=keywords,
                       duration=60 * 60 * 10,
                       reward=0.50)
Example #9
0
def check_notes_mailbox():
    try:
        MAILBOX_USER = os.environ['NOTES_MAILBOX_USERNAME']
        MAILBOX_PASSWORD = os.environ['NOTES_MAILBOX_PASSWORD']
        FILEPICKER_API_KEY = os.environ['FILEPICKER_API_KEY']
        MTURK_HOST = os.environ['MTURK_HOST']
    except:
        logger.warn('Could not find notes mailbox secrets, not running check_notes_mailbox')
        return

    connection = MTurkConnection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY,
                                 host=MTURK_HOST)

    mailbox = poplib.POP3_SSL('pop.gmail.com', 995)
    mailbox.user(MAILBOX_USER)
    mailbox.pass_(MAILBOX_PASSWORD)
    numMessages = len(mailbox.list()[1])
    for i in range(numMessages):
        # construct message object from raw message
        raw_message_string = '\n'.join(mailbox.retr(i+1)[1])
        message = email.message_from_string(raw_message_string)

        if not message.is_multipart():
            logger.warn('Got an email with no attachments')
            continue

        attachments = []
        message_body = ''

        message_parts = message.get_payload()
        for part in message_parts:
            # Look for the message's plain text body
            if part.get_content_type() == 'text/plain' and part['Content-Disposition'] is None:
                message_body = part.get_payload()

            # Look for attachments
            elif part['Content-Disposition'] and 'attachment;' in part['Content-Disposition']:
                attachment_mimetype = part.get_content_type()
                attachment_filename = re.search(CONTENT_DISPOSITION_REGEX, part['Content-Disposition']).group('filename')

                if part['Content-Transfer-Encoding'] == 'base64':
                    attachment_data = base64.decodestring(part.get_payload())
                else:
                    attachment_data = part.get_payload()

                # Upload attachment to filepicker
                resp = requests.post('https://www.filepicker.io/api/store/S3?key={key}&policy={policy}&' \
                                     'signature={signature}&mimetype={mimetype}&filename={filename}'
                                     .format(key=FILEPICKER_API_KEY, policy=FP_POLICY_READ_WRITE,
                                             signature=FP_SIGNATURE_READ_WRITE, mimetype=attachment_mimetype,
                                             filename=attachment_filename),
                                      data=attachment_data)

                if resp.status_code == 200:
                    url = json.loads(resp.text)['url']
                    url = url + '?policy={policy}&amp;signature={signature}'\
                        .format(policy=FP_POLICY_READ, signature=FP_SIGNATURE_READ)
                    attachments.append((url, attachment_filename))
                else:
                    logger.warn('Could not upload an attachment to filepicker')

        message_subject = message['Subject']

        overview = Overview()
        overview.append(FormattedContent(
            EMAIL_HIT_OVERVIEW_TEMPLATE.format(subject=message_subject, body=message_body, attachments='')))

        single_line_answer = FreeTextAnswer()
        single_line_answer.num_lines = 1

        question_form = QuestionForm()
        question_form.append(overview)

        course_spam_content = QuestionContent()
        course_spam_content.append_field('Title', 'Does the email contain course notes (check attachments below)?')
        answer = SelectionAnswer(style='dropdown', selections=[('No', 'no'), ('Yes', 'yes')])
        course_spam = Question(identifier=COURSE_SPAM_QID,
                               content=course_spam_content,
                               answer_spec=AnswerSpecification(answer),
                               is_required=True)
        question_form.append(course_spam)

        course_name_content = QuestionContent()
        course_name_content.append_field('Title', 'Course Name')
        course_name = Question(identifier=COURSE_NAME_QID,
                               content=course_name_content,
                               answer_spec=AnswerSpecification(single_line_answer),
                               is_required=True)
        question_form.append(course_name)

        instructor_names_content = QuestionContent()
        instructor_names_content.append_field('Title', 'Instructor Name(s)')
        instructor_names = Question(identifier=INSTRUCTOR_NAMES_QID,
                                    content=instructor_names_content,
                                    answer_spec=AnswerSpecification(single_line_answer),
                                    is_required=False)
        question_form.append(instructor_names)

        school_name_content = QuestionContent()
        school_name_content.append_field('Title', 'School Name')
        school_name = Question(identifier=SCHOOL_NAME_QID,
                               content=school_name_content,
                               answer_spec=AnswerSpecification(single_line_answer),
                               is_required=True)
        question_form.append(school_name)

        department_name_content = QuestionContent()
        department_name_content.append_field('Title', 'Department Name')
        department_name = Question(identifier=DEPARTMENT_NAME_QID,
                                   content=department_name_content,
                                   answer_spec=AnswerSpecification(single_line_answer),
                                   is_required=False)
        question_form.append(department_name)

        for i in range(len(attachments)):
            overview = Overview()
            overview.append(FormattedContent(
                EMAIL_HIT_ATTACHMENT_OVERVIEW_TEMPLATE.format(link=attachments[i][0], name=attachments[i][1])))

            question_form.append(overview)

            note_title_content = QuestionContent()
            note_title_content.append_field('Title', 'Note Title')
            note_title = Question(identifier=NOTE_TITLE_QID_TEMPLATE + str(i),
                                  content=note_title_content,
                                  answer_spec=AnswerSpecification(single_line_answer),
                                  is_required=True)
            question_form.append(note_title)

            note_category_content = QuestionContent()
            note_category_content.append_field('Title', 'Note Category')
            answer = SelectionAnswer(style='dropdown', selections=NOTE_CATEGORIES_FOR_MTURK)
            note_category = Question(identifier=NOTE_CATEGORY_QID_TEMPLATE + str(i),
                                     content=note_category_content,
                                     answer_spec=AnswerSpecification(answer),
                                     is_required=True)
            question_form.append(note_category)


        hit = connection.create_hit(questions=question_form, max_assignments=1,
                      title=EMAIL_HIT_TITLE, description=EMAIL_HIT_DESCRIPTION,
                      keywords=EMAIL_HIT_KEYWORDS, duration=EMAIL_HIT_DURATION,
                      reward=EMAIL_HIT_REWARD, qualifications=EMAIL_HIT_QUALIFICATION)[0]
Example #10
0
                       style='dropdown',
                       selections=ratings,
                       type='text',
                       other=False)

q1 = Question(identifier='rating',
              content=qc1,
              answer_spec=AnswerSpecification(fta1),
              is_required=True)

#make q2

qc2 = QuestionContent()
qc2.append_field('Title', 'Comments about the HIT (Optional)')

fta2 = FreeTextAnswer()

q2 = Question(identifier="comments",
              content=qc2,
              answer_spec=AnswerSpecification(fta2))

#make question form

question_form = QuestionForm()
question_form.append(overview)
question_form.append(q1)
question_form.append(q2)

#--------------- CREATE THE HIT -------------------

mtc.create_hit(questions=question_form,
Example #11
0
def new_rate_hit(PIN_IMAGE_URL, PIN_IMAGE_TITLE, MACYS_IMAGE_URL,
                 MACYS_IMAGE_TITLE):
    mtc = MTurkConnection(aws_access_key_id=AWS_ACCESS_KEY_ID,
                          aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                          host=HOST)

    if debug: print mtc.get_account_balance()

    title = 'Match these Pictures to Macy\'s Products'
    description = 'Look at this photo and match it to Macy\'s products'
    keywords = 'clothing, rating, opinions, easy, quick, macys'

    ratings = [('Very Bad', '1'), ('Bad', '2'), ('OK', '3'), ('Good', '4'),
               ('Very Good', '5')]

    #make overview

    overview = Overview()
    overview.append_field('Title', 'Rank how these two images match.')
    overview.append(
        FormattedContent('<table border="1">><tr><td width="50%"><img src="' +
                         PIN_IMAGE_URL + '" alt="Pintrest Image" /></td>'
                         '<td width="50%"><img src="' + MACYS_IMAGE_URL +
                         '" alt="Macys Image" /></td></tr><tr>'
                         '<td width="50%">' + PIN_IMAGE_TITLE +
                         '</td><td width="50%">' + MACYS_IMAGE_TITLE +
                         '</td></tr></table>'))
    #make q1

    qc1 = QuestionContent()
    qc1.append_field('Title', 'Rank the match between these two')

    fta1 = SelectionAnswer(min=1,
                           max=1,
                           style='dropdown',
                           selections=ratings,
                           type='text',
                           other=False)

    q1 = Question(identifier='rating',
                  content=qc1,
                  answer_spec=AnswerSpecification(fta1),
                  is_required=True)

    #make q2

    qc2 = QuestionContent()
    qc2.append_field('Title', 'Comments about the HIT (Optional)')

    fta2 = FreeTextAnswer()

    q2 = Question(identifier="comments",
                  content=qc2,
                  answer_spec=AnswerSpecification(fta2))

    #make question form

    question_form = QuestionForm()
    question_form.append(overview)
    question_form.append(q1)
    question_form.append(q2)

    #--------------- CREATE THE HIT -------------------

    mtc.create_hit(questions=question_form,
                   max_assignments=1,
                   title=title,
                   description=description,
                   keywords=keywords,
                   duration=60 * 5,
                   reward=0.05)
Example #12
0
def new_sugg_hit(PIN_IMAGE_URL, PIN_IMAGE_TITLE):

    mtc = MTurkConnection(aws_access_key_id=AWS_ACCESS_KEY_ID,
                          aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                          host=HOST)

    if debug: print mtc.get_account_balance()

    title = 'Match these Pictures to Macy\'s Products'
    description = 'Look at this photo and match it to Macy\'s products'
    keywords = 'clothing, rating, opinions, easy, quick, macys'

    #make overview

    overview = Overview()
    overview.append_field('Title',
                          'Find three Macys.com Product Web IDs That Match')
    overview.append(
        FormattedContent('<img src="' + PIN_IMAGE_URL +
                         '" alt="Pintrest Image" />'
                         '<br />' + PIN_IMAGE_TITLE))

    #make webid1

    qc1 = QuestionContent()
    qc1.append_field('Title', 'First WebID Code')

    fta1 = FreeTextAnswer(num_lines=1)

    q1 = Question(identifier="FirstWebCode",
                  content=qc1,
                  answer_spec=AnswerSpecification(fta1))

    #make webid2

    qc2 = QuestionContent()
    qc2.append_field('Title', 'Second WebID Code')

    fta2 = FreeTextAnswer(num_lines=1)

    q2 = Question(identifier="SecondWebCode",
                  content=qc2,
                  answer_spec=AnswerSpecification(fta2))

    #make webid1

    qc3 = QuestionContent()
    qc3.append_field('Title', 'Third WebID Code')

    fta3 = FreeTextAnswer(num_lines=1)

    q3 = Question(identifier="ThirdWebCode",
                  content=qc3,
                  answer_spec=AnswerSpecification(fta3))

    #make question form

    question_form = QuestionForm()
    question_form.append(overview)
    question_form.append(q1)
    question_form.append(q2)
    question_form.append(q3)

    #--------------- CREATE THE HIT -------------------

    mtc.create_hit(questions=question_form,
                   max_assignments=1,
                   title=title,
                   description=description,
                   keywords=keywords,
                   duration=60 * 5,
                   reward=0.05)
Example #13
0
#
# Questions can each be designed uniquely. For this demo I only build enough
# of a QuestionContent to host an input box by using a FreeTextAnswer. There
# are multiple options for how a QuestionContent is constructed.
#
# I recommend reading: http://docs.amazonwebservices.com/AWSMechanicalTurkRequester/2007-06-21/ApiReference_QuestionFormDataStructureArticle.html
#
# Quickly, the available content types are: Title, Text, List, Binary,
#                                           Application, EmbeddedBinary,
#                                           FormattedContent

question_list = []
for i in xrange(5):
    qc = QuestionContent()
    qc.append_field('Text', "Fav #%s:" % i)
    fta = FreeTextAnswer()
    ansp = AnswerSpecification(fta)
    q = Question(identifier=str(uuid.uuid4()), content=qc, answer_spec=ansp)
    question_list.append(q)

# QuestionForm design
#
# A QuestionForm is a container for what the HIT task is shaped like. In this
# example, we build one like:
#
#     <QuestionForm>
#        <Overview></Overview>
#        <QuestionContent></QuestionContent>
#        ...,
#        <QuestionContent></QuestionContent>
#     </QuestionForm>
Example #14
0
def check_notes_mailbox():
    MTURK_HOST = run_mturk('get_extract_keywords_results')
    if not MTURK_HOST:
        return

    try:
        MAILBOX_USER = os.environ['NOTES_MAILBOX_USERNAME']
        MAILBOX_PASSWORD = os.environ['NOTES_MAILBOX_PASSWORD']
        FILEPICKER_API_KEY = os.environ['FILEPICKER_API_KEY']
    except:
        logger.warn(
            'Could not find notes mailbox secrets, not running check_notes_mailbox'
        )
        return

    connection = MTurkConnection(settings.AWS_ACCESS_KEY_ID,
                                 settings.AWS_SECRET_ACCESS_KEY,
                                 host=MTURK_HOST)

    mailbox = poplib.POP3_SSL('pop.gmail.com', 995)
    mailbox.user(MAILBOX_USER)
    mailbox.pass_(MAILBOX_PASSWORD)
    numMessages = len(mailbox.list()[1])
    for i in range(numMessages):
        # construct message object from raw message
        raw_message_string = '\n'.join(mailbox.retr(i + 1)[1])
        message = email.message_from_string(raw_message_string)

        if not message.is_multipart():
            logger.warn('Got an email with no attachments')
            continue

        attachments = []
        message_body = ''

        message_parts = message.get_payload()
        for part in message_parts:
            # Look for the message's plain text body
            if part.get_content_type(
            ) == 'text/plain' and part['Content-Disposition'] is None:
                message_body = part.get_payload()

            # Look for attachments
            elif part['Content-Disposition'] and 'attachment;' in part[
                    'Content-Disposition']:
                attachment_mimetype = part.get_content_type()
                attachment_filename = re.search(
                    CONTENT_DISPOSITION_REGEX,
                    part['Content-Disposition']).group('filename')

                if part['Content-Transfer-Encoding'] == 'base64':
                    attachment_data = base64.decodestring(part.get_payload())
                else:
                    attachment_data = part.get_payload()

                # Upload attachment to filepicker
                resp = requests.post('https://www.filepicker.io/api/store/S3?key={key}&policy={policy}&' \
                                     'signature={signature}&mimetype={mimetype}&filename={filename}'
                                     .format(key=FILEPICKER_API_KEY, policy=FP_POLICY_READ_WRITE,
                                             signature=FP_SIGNATURE_READ_WRITE, mimetype=attachment_mimetype,
                                             filename=attachment_filename),
                                      data=attachment_data)

                if resp.status_code == 200:
                    url = json.loads(resp.text)['url']
                    url = url + '?policy={policy}&amp;signature={signature}'\
                        .format(policy=FP_POLICY_READ, signature=FP_SIGNATURE_READ)
                    attachments.append((url, attachment_filename))
                else:
                    logger.warn('Could not upload an attachment to filepicker')

        message_subject = message['Subject']

        overview = Overview()
        overview.append(
            FormattedContent(
                EMAIL_HIT_OVERVIEW_TEMPLATE.format(subject=message_subject,
                                                   body=message_body,
                                                   attachments='')))

        single_line_answer = FreeTextAnswer()
        single_line_answer.num_lines = 1

        question_form = QuestionForm()
        question_form.append(overview)

        course_spam_content = QuestionContent()
        course_spam_content.append_field(
            'Title',
            'Does the email contain course notes (check attachments below)?')
        answer = SelectionAnswer(style='dropdown',
                                 selections=[('No', 'no'), ('Yes', 'yes')])
        course_spam = Question(identifier=COURSE_SPAM_QID,
                               content=course_spam_content,
                               answer_spec=AnswerSpecification(answer),
                               is_required=True)
        question_form.append(course_spam)

        course_name_content = QuestionContent()
        course_name_content.append_field('Title', 'Course Name')
        course_name = Question(
            identifier=COURSE_NAME_QID,
            content=course_name_content,
            answer_spec=AnswerSpecification(single_line_answer),
            is_required=True)
        question_form.append(course_name)

        instructor_names_content = QuestionContent()
        instructor_names_content.append_field('Title', 'Instructor Name(s)')
        instructor_names = Question(
            identifier=INSTRUCTOR_NAMES_QID,
            content=instructor_names_content,
            answer_spec=AnswerSpecification(single_line_answer),
            is_required=False)
        question_form.append(instructor_names)

        school_name_content = QuestionContent()
        school_name_content.append_field('Title', 'School Name')
        school_name = Question(
            identifier=SCHOOL_NAME_QID,
            content=school_name_content,
            answer_spec=AnswerSpecification(single_line_answer),
            is_required=True)
        question_form.append(school_name)

        department_name_content = QuestionContent()
        department_name_content.append_field('Title', 'Department Name')
        department_name = Question(
            identifier=DEPARTMENT_NAME_QID,
            content=department_name_content,
            answer_spec=AnswerSpecification(single_line_answer),
            is_required=False)
        question_form.append(department_name)

        for i in range(len(attachments)):
            overview = Overview()
            overview.append(
                FormattedContent(
                    EMAIL_HIT_ATTACHMENT_OVERVIEW_TEMPLATE.format(
                        link=attachments[i][0], name=attachments[i][1])))

            question_form.append(overview)

            note_title_content = QuestionContent()
            note_title_content.append_field('Title', 'Note Title')
            note_title = Question(
                identifier=NOTE_TITLE_QID_TEMPLATE + str(i),
                content=note_title_content,
                answer_spec=AnswerSpecification(single_line_answer),
                is_required=True)
            question_form.append(note_title)

            note_category_content = QuestionContent()
            note_category_content.append_field('Title', 'Note Category')
            answer = SelectionAnswer(style='dropdown',
                                     selections=NOTE_CATEGORIES_FOR_MTURK)
            note_category = Question(identifier=NOTE_CATEGORY_QID_TEMPLATE +
                                     str(i),
                                     content=note_category_content,
                                     answer_spec=AnswerSpecification(answer),
                                     is_required=True)
            question_form.append(note_category)

        hit = connection.create_hit(questions=question_form,
                                    max_assignments=1,
                                    title=EMAIL_HIT_TITLE,
                                    description=EMAIL_HIT_DESCRIPTION,
                                    keywords=EMAIL_HIT_KEYWORDS,
                                    duration=EMAIL_HIT_DURATION,
                                    reward=EMAIL_HIT_REWARD,
                                    qualifications=EMAIL_HIT_QUALIFICATION)[0]
Example #15
0
def submit_extract_keywords_hit(note):
    """Create a Mechanical Turk HIT that asks a worker to
    choose keywords and definitions from the given note."""

    MTURK_HOST = run_mturk('submit_extract_keywords_hit')
    if not MTURK_HOST:
        return

    connection = MTurkConnection(settings.AWS_ACCESS_KEY_ID,
                                 settings.AWS_SECRET_ACCESS_KEY,
                                 host=MTURK_HOST)

    if note.course.school:
        title = KEYWORDS_HIT_TITLE_TEMPLATE.format(
            course=note.course.name, school=note.course.school.name)
    else:
        title = KEYWORDS_HIT_TITLE_TEMPLATE.format(
            course=note.course.name, school=note.course.department.school.name)

    overview = Overview()
    overview.append(
        FormattedContent(
            KEYWORDS_HIT_OVERVIEW_TEMPLATE.format(
                domain=Site.objects.get_current(),
                link=note.get_absolute_url())))

    keyword_fta = FreeTextAnswer()
    keyword_fta.num_lines = 1

    definition_fta = FreeTextAnswer()
    definition_fta.num_lines = 3

    question_form = QuestionForm()
    question_form.append(overview)

    for i in range(
            min(len(KEYWORDS_HIT_KEYWORD_FIELDS),
                len(KEYWORDS_HIT_DEFINITION_FIELDS))):
        keyword_content = QuestionContent()
        keyword_content.append_field('Title',
                                     KEYWORDS_HIT_KEYWORD_FIELDS[i][1])
        keyword_question = Question(
            identifier=KEYWORDS_HIT_KEYWORD_FIELDS[i][0],
            content=keyword_content,
            answer_spec=AnswerSpecification(keyword_fta),
            is_required=True if i <= 10 else False)
        question_form.append(keyword_question)

        definition_content = QuestionContent()
        definition_content.append_field('Title',
                                        KEYWORDS_HIT_DEFINITION_FIELDS[i][1])
        definition_question = Question(
            identifier=KEYWORDS_HIT_DEFINITION_FIELDS[i][0],
            content=definition_content,
            answer_spec=AnswerSpecification(definition_fta),
            is_required=False)
        question_form.append(definition_question)

    hit = connection.create_hit(questions=question_form,
                                max_assignments=1,
                                title=title,
                                description=KEYWORDS_HIT_DESCRIPTION,
                                keywords=KEYWORDS_HIT_KEYWORDS,
                                duration=KEYWORDS_HIT_DURATION,
                                reward=KEYWORDS_HIT_REWARD,
                                qualifications=KEYWORDS_HIT_QUALIFICATION,
                                annotation=str(note.id))[0]

    KeywordExtractionHIT.objects.create(HITId=hit.HITId,
                                        note=note,
                                        processed=False)
Example #16
0
def demo_create_favorite_color_hit():
    """A HIT to determine the Worker's favorite color"""

    TITLE = 'Tell me your favorite color'
    DESCRIPTION = ('This is a HIT that is created by a computer program '
                   'to demonstrate how Mechanical Turk works. This should '
                   'be a free HIT for the worker.')
    KEYWORDS = 'data collection, favorite, color'
    DURATION = 15 * 60  # 15 minutes (Time to work on HIT)
    MAX_ASSIGNMENTS = 1  # Number of assignments per HIT
    REWARD_PER_ASSIGNMENT = 0.00  # $0.00 USD (1 cent)

    #--------------- BUILD HIT container -------------------
    overview = Overview()
    overview.append_field('Title', TITLE)
    overview.append(
        FormattedContent(
            "<p>This is an experiment to learn Mechanical Turk</p>"))

    #---------------  BUILD QUESTION 1 -------------------
    question_content = QuestionContent()
    question_content.append(
        FormattedContent(
            "<b>What is your favorite color?</b> There isn't a financial "
            "reward for answering, but you will get an easy approval for your "
            "statistics."))

    free_text_answer = FreeTextAnswer(num_lines=1)

    q1 = Question(identifier='favorite_color',
                  content=question_content,
                  answer_spec=AnswerSpecification(free_text_answer),
                  is_required=True)

    #---------------  BUILD QUESTION 3 -------------------
    question_content = QuestionContent()
    question_content.append(
        FormattedContent("""<p>Give me a fun comment:</p>"""))

    q2 = Question(identifier="comments",
                  content=question_content,
                  answer_spec=AnswerSpecification(FreeTextAnswer()))

    #--------------- BUILD THE QUESTION FORM -------------------
    question_form = QuestionForm()
    question_form.append(overview)
    question_form.append(q1)
    question_form.append(q2)

    #--------------- CREATE THE HIT -------------------
    mtc = get_connection()
    hit = mtc.create_hit(questions=question_form,
                         max_assignments=MAX_ASSIGNMENTS,
                         title=TITLE,
                         description=DESCRIPTION,
                         keywords=KEYWORDS,
                         duration=DURATION,
                         reward=REWARD_PER_ASSIGNMENT)

    #---------- SHOW A LINK TO THE HIT GROUP -----------
    base = get_worker_url()

    print "\nVisit this website to see the HIT that was created:"
    print "%s/mturk/preview?groupId=%s" % (base, hit[0].HITTypeId)

    return hit[0]