Exemple #1
0
def upload_questions_to_crowdflower(title,
                                    job_distinguish_tag,
                                    question_content,
                                    max_judgments_per_worker=50,
                                    units_per_assignment=3,
                                    judgments_per_unit=1,
                                    payment_cents=3):
    conn = crowdflower.Connection(api_key=api_key.api_key_string)
    job = conn.upload(question_content)
    job.update({
        'title': title,
        'max_judgments_per_worker': max_judgments_per_worker,
        'units_per_assignment': units_per_assignment,
        'judgments_per_unit': judgments_per_unit,
        'payment_cents': payment_cents,
        'instructions': '''
            <h3><em><strong>Semantic Discrimination</strong>&nbsp;</em></h3>
            <p><em><strong>Judge whether the two paragraphs describe the same object or not.</strong></em></p>
            ''',
        'cml': '''
            <p>{{content}}</p>
            <cml:radios label="Are these two paragraphs describe the same object?" name="option" validates="required" gold="true">
                <cml:radio label="Yes, they describe the same object" value="same"></cml:radio>
                <cml:radio label="No, they describe the different object" value="different"></cml:radio>
            </cml:radios>
            ''',
        'options': {
            'front_load': 0,  # quiz mode = 1; turn off with 0
        }
    })
    job.tags = [job_distinguish_tag]
    job.launch(len(question_content), channels=('on_demand', 'cf_internal'))
def collect_answers_from_crowdflower(job_distinguish_tag):
    conn = crowdflower.Connection(api_key=api_key.api_key_string)
    result = []
    out_dir = os.path.dirname(
        os.path.realpath(__file__)) + os.path.sep + 'cf_temp/'
    for job in conn.jobs():
        #         if job_distinguish_tag in job.tags and job.properties['state'] == 'finished':
        if job_distinguish_tag in job.tags:
            job.download_csv(out_dir + str(job.id) + '.csv')
            reader = csv.reader(file(out_dir + str(job.id) + '.csv', 'rb'))
            first_line = True
            for line in reader:
                if first_line == True:
                    first_line = False
                    for index, item in enumerate(line):
                        if item == 'id':
                            id_index = index
                        if item == 'category':
                            category_index = index

                else:
                    worker_id = line[7]
                    unique_id = line[id_index]
                    option_single_answer = line[category_index].split('_')[1]
                    result.append((unique_id, worker_id, option_single_answer))
            break
    return job_distinguish_tag, result
Exemple #3
0
def collect_answers_from_crowdflower(job_distinguish_tag):
    conn = crowdflower.Connection(api_key=api_key.api_key_string)
    rating_result = []
    out_dir = os.path.dirname(
        os.path.realpath(__file__)) + os.path.sep + 'cf_temp/'
    for job in conn.jobs():
        if job_distinguish_tag in job.tags:
            job.download_csv(out_dir + str(job.id) + '.csv')
            reader = csv.reader(file(out_dir + str(job.id) + '.csv', 'rb'))
            first_line = True
            for line in reader:
                for index, item in enumerate(line):
                    print index, item
                    if item == 'id':
                        id_index = index
                    if item == 'option':
                        option_index = index
                task_id = line[id_index]
                option = line[option_index]
                if option == 'same':
                    option_result = 1
                elif option == 'different':
                    option_result = 0
                worker_id = line[7]
                if first_line == False:
                    rating_result.append([task_id, worker_id, option_result])
                    print[task_id, worker_id, option_result]
                else:
                    first_line = False
    return job_distinguish_tag, rating_result
Exemple #4
0
def query_the_status_of_the_job(job_distinguish_tag):
    conn = crowdflower.Connection(api_key=api_key.api_key_string)
    for job in conn.jobs():
        if job_distinguish_tag in job.tags:
            status_of_the_job = job.properties['state']
            break
    print status_of_the_job
    return status_of_the_job == 'finished'
Exemple #5
0
def upload_questions_to_crowdflower(title,
                                    job_distinguish_tag,
                                    question_content,
                                    max_judgments_per_worker=50,
                                    units_per_assignment=3,
                                    judgments_per_unit=1,
                                    payment_cents=3):
    conn = crowdflower.Connection(api_key=api_key.api_key_string)
    refined_question_content = []
    columns_content = question_content[0]['columns']
    for i in range(len(question_content)):
        new_dict = {}
        for key in question_content[i]:
            if key != 'columns':
                new_dict[key] = question_content[i][key]
        count = 0
        for item in columns_content:
            new_dict[str(item) + "answer_attr" + str(count)] = item
            count += 1
        refined_question_content.append(new_dict)
    job = conn.upload(refined_question_content)
    first_line = "<p>{{content}}</p>"
    question_strings = first_line
    for i in range(len(columns_content)):
        item_first_line = "<p>{{" + columns_content[i] + "answer_attr" + str(
            i) + "}}</p>"
        item_second_line = '''<cml:text label="" validates="required" gold="true" name = "fill_''' + str(
            i) + '''1" />'''
        item = item_first_line + item_second_line
        question_strings = question_strings + item

    job.update({
        'title': title,
        'max_judgments_per_worker': max_judgments_per_worker,
        'units_per_assignment': units_per_assignment,
        'judgments_per_unit': judgments_per_unit,
        'payment_cents': payment_cents,
        'instructions': '''
            <h3><em><strong>Semantic Discrimination</strong>&nbsp;</em></h3>
            <p><em><strong>Judge whether the two paragraphs describe the same object or not.</strong></em></p>
            ''',
        'cml': question_strings,
        'options': {
            'front_load': 0,  # quiz mode = 1; turn off with 0
        }
    })
    job.tags = [job_distinguish_tag]
    job.launch(len(refined_question_content),
               channels=('on_demand', 'cf_internal'))
def collect_answers_from_crowdflower(job_distinguish_tag):
    conn = crowdflower.Connection(api_key=api_key.api_key_string)
    result = []
    out_dir = os.path.dirname(os.path.realpath(__file__)) + '/cf_temp'
    for job in conn.jobs():
        if job_distinguish_tag in job.tags:
            job.download_csv(out_dir + os.path.sep + str(job.id) + '.csv')
            reader = csv.reader(
                file(out_dir + os.path.sep + str(job.id) + '.csv', 'rb'))
            first_line = True
            for line in reader:
                if first_line == True:
                    for index, item in enumerate(line):
                        if item == 'id':
                            id_index = index
                    col_list = {}
                    for index, item in enumerate(line):
                        if "answer_attr" in item:
                            attr_string = item.split("answer_attr")[1]
                            if '_dot_' in item:
                                attr_col_string_list = item.split(
                                    "answer_attr")[0].split('_dot_')
                                attr_col_string = attr_col_string_list[
                                    0] + '.' + attr_col_string_list[1]
                                col_list[int(attr_string)] = attr_col_string
                            elif '_dot_' not in item:
                                col_list[int(attr_string)] = item.split(
                                    "answer_attr")[0]

                    fill_list = []
                    for index, item in enumerate(line):
                        if 'fill' in item and 'gold' not in item:
                            number_this_time = int(item[-2])
                            fill_list.append((number_this_time, index))
                    first_line = False
                else:
                    worker_id = line[7]
                    attr_answers = {}
                    unique_id = line[id_index]
                    for i in range(len(fill_list)):
                        attr_answers[col_list[i]] = line[fill_list[i][1]]
                    if first_line == False:
                        result.append((unique_id, worker_id, attr_answers))
                    else:
                        first_line = False
    return job_distinguish_tag, result
def upload_questions_to_crowdflower(title,
                                    job_distinguish_tag,
                                    question_content,
                                    max_judgments_per_worker=50,
                                    units_per_assignment=3,
                                    judgments_per_unit=1,
                                    payment_cents=3):

    conn = crowdflower.Connection(api_key=api_key.api_key_string)
    job = conn.upload(question_content)
    options = question_content[0]['options']
    first_line = '''
    <img src="{{url}}" id="">
    <p>{{content}}</p>
    <cml:radios validates="required" label="Choose the best option" name="category" class="">
    '''
    question_strings = first_line
    for i in range(len(options)):
        item = '''<cml:radio label="''' + str(
            options[i]) + '''"  value = "fill_''' + str(
                options[i]) + '''" />'''
        question_strings = question_strings + item + '\n'
    question_strings = question_strings + '''</cml:radios>'''

    job.update({
        'title': title,
        'max_judgments_per_worker': max_judgments_per_worker,
        'units_per_assignment': units_per_assignment,
        'judgments_per_unit': judgments_per_unit,
        'payment_cents': payment_cents,
        'instructions': '''
            <h3><em><strong>Tell us the details about the country</strong>&nbsp;</em></h3>
            ''',
        'cml': question_strings,
        'options': {
            'front_load': 0,  # quiz mode = 1; turn off with 0
        }
    })
    job.tags = [job_distinguish_tag]
    job.launch(len(question_content), channels=('on_demand', 'cf_internal'))
def upload_questions_to_crowdflower(title, job_distinguish_tag,
                                    question_content, max_judgments_per_worker,
                                    units_per_assignment, judgments_per_unit,
                                    payment_cents):

    conn = crowdflower.Connection(api_key=api_key.api_key_string)
    refined_question_content = []
    columns_content = question_content[0]['columns']
    for i in range(len(question_content)):
        limit = int(question_content[i]['limit'])
        repeats = int(question_content[i]['repeats'])
        total_repeat_times = limit * repeats
        new_dict = {}
        for key in question_content[i]:
            if key != 'columns':
                new_dict[key] = question_content[i][key]
        count = 0
        for item in columns_content:
            if '.' in item:
                first_part = item.split('.')[0]
                second_part = item.split('.')[1]
                new_dict[str(first_part) + '_dot_' + str(second_part) +
                         'answer_attr' + str(count)] = item.split('.')[1]
                count += 1
            elif '.' not in item:
                new_dict[str(item) + 'answer_attr' + str(count)] = item
                count += 1
        new_dict['number_counting'] = str(i)
        for j in range(total_repeat_times):
            refined_question_content.append(new_dict)

    job = conn.upload(refined_question_content)

    first_line = "<p>{{content}}</p>"
    question_strings = first_line
    for i in range(len(columns_content)):
        if '.' in columns_content[i]:
            first_part = str(columns_content[i].split('.')[0])
            second_part = str(columns_content[i].split('.')[1])
            item_first_line = "<p>{{" + first_part + '_dot_' + second_part + 'answer_attr' + str(
                i) + "}}</p>"
            item_second_line = '''<cml:text label="" validates="required" gold="true" name = "fill_''' + str(
                i) + '''1" />'''
            item = item_first_line + item_second_line
            question_strings = question_strings + item
        elif '.' not in columns_content[i]:
            item_first_line = "<p>{{" + columns_content[
                i] + "answer_attr" + str(i) + "}}</p>"
            item_second_line = '''<cml:text label="" validates="required" gold="true" name = "fill_''' + str(
                i) + '''1" />'''
            item = item_first_line + item_second_line
            question_strings = question_strings + item

    job.update({
        'title': title,
        'max_judgments_per_worker': max_judgments_per_worker,
        'units_per_assignment': units_per_assignment,
        'judgments_per_unit': judgments_per_unit,
        'payment_cents': payment_cents,
        'instructions': '''
            <h3><em><strong>Tell us the details about the country</strong>&nbsp;</em></h3>
            ''',
        'cml': question_strings,
        'options': {
            'front_load': 0,  # quiz mode = 1; turn off with 0
        }
    })
    job.tags = [job_distinguish_tag]
    job.launch(len(refined_question_content),
               channels=('on_demand', 'cf_internal'))
Exemple #9
0
import os
import sys
import json
import random
import crowdflower
from crowdflower.exception import CrowdFlowerError

# expects api key to be available in your environment variables; does not use cache
conn = crowdflower.Connection()
job_tag = 'python-example'


def _find_job():
    for job in conn.jobs():
        if job_tag in job.tags:
            return job


def create():
    filename = os.path.join(crowdflower.root, 'examples', 'spam.txt')

    def iter_data(labels=None):
        for i, line in enumerate(open(filename), 1):
            label, text = line.strip().split('\t', 1)
            if labels is None or label in labels:
                # the "text" key is the only required field; the others just help us
                # keep track of what's been annotated and mark which are the test data
                yield {
                    'id': '{}:{}'.format(filename, i),
                    'text': text,
                    'label': label