def upload_questions_to_crowdflower(title, job_distinguish_tag, question_content, max_judgments_per_worker=50, units_per_assignment=3, judgments_per_unit=1, payment_cents=3): conn = crowdflower.Connection(api_key=api_key.api_key_string) job = conn.upload(question_content) job.update({ 'title': title, 'max_judgments_per_worker': max_judgments_per_worker, 'units_per_assignment': units_per_assignment, 'judgments_per_unit': judgments_per_unit, 'payment_cents': payment_cents, 'instructions': ''' <h3><em><strong>Semantic Discrimination</strong> </em></h3> <p><em><strong>Judge whether the two paragraphs describe the same object or not.</strong></em></p> ''', 'cml': ''' <p>{{content}}</p> <cml:radios label="Are these two paragraphs describe the same object?" name="option" validates="required" gold="true"> <cml:radio label="Yes, they describe the same object" value="same"></cml:radio> <cml:radio label="No, they describe the different object" value="different"></cml:radio> </cml:radios> ''', 'options': { 'front_load': 0, # quiz mode = 1; turn off with 0 } }) job.tags = [job_distinguish_tag] job.launch(len(question_content), channels=('on_demand', 'cf_internal'))
def collect_answers_from_crowdflower(job_distinguish_tag): conn = crowdflower.Connection(api_key=api_key.api_key_string) result = [] out_dir = os.path.dirname( os.path.realpath(__file__)) + os.path.sep + 'cf_temp/' for job in conn.jobs(): # if job_distinguish_tag in job.tags and job.properties['state'] == 'finished': if job_distinguish_tag in job.tags: job.download_csv(out_dir + str(job.id) + '.csv') reader = csv.reader(file(out_dir + str(job.id) + '.csv', 'rb')) first_line = True for line in reader: if first_line == True: first_line = False for index, item in enumerate(line): if item == 'id': id_index = index if item == 'category': category_index = index else: worker_id = line[7] unique_id = line[id_index] option_single_answer = line[category_index].split('_')[1] result.append((unique_id, worker_id, option_single_answer)) break return job_distinguish_tag, result
def collect_answers_from_crowdflower(job_distinguish_tag): conn = crowdflower.Connection(api_key=api_key.api_key_string) rating_result = [] out_dir = os.path.dirname( os.path.realpath(__file__)) + os.path.sep + 'cf_temp/' for job in conn.jobs(): if job_distinguish_tag in job.tags: job.download_csv(out_dir + str(job.id) + '.csv') reader = csv.reader(file(out_dir + str(job.id) + '.csv', 'rb')) first_line = True for line in reader: for index, item in enumerate(line): print index, item if item == 'id': id_index = index if item == 'option': option_index = index task_id = line[id_index] option = line[option_index] if option == 'same': option_result = 1 elif option == 'different': option_result = 0 worker_id = line[7] if first_line == False: rating_result.append([task_id, worker_id, option_result]) print[task_id, worker_id, option_result] else: first_line = False return job_distinguish_tag, rating_result
def query_the_status_of_the_job(job_distinguish_tag): conn = crowdflower.Connection(api_key=api_key.api_key_string) for job in conn.jobs(): if job_distinguish_tag in job.tags: status_of_the_job = job.properties['state'] break print status_of_the_job return status_of_the_job == 'finished'
def upload_questions_to_crowdflower(title, job_distinguish_tag, question_content, max_judgments_per_worker=50, units_per_assignment=3, judgments_per_unit=1, payment_cents=3): conn = crowdflower.Connection(api_key=api_key.api_key_string) refined_question_content = [] columns_content = question_content[0]['columns'] for i in range(len(question_content)): new_dict = {} for key in question_content[i]: if key != 'columns': new_dict[key] = question_content[i][key] count = 0 for item in columns_content: new_dict[str(item) + "answer_attr" + str(count)] = item count += 1 refined_question_content.append(new_dict) job = conn.upload(refined_question_content) first_line = "<p>{{content}}</p>" question_strings = first_line for i in range(len(columns_content)): item_first_line = "<p>{{" + columns_content[i] + "answer_attr" + str( i) + "}}</p>" item_second_line = '''<cml:text label="" validates="required" gold="true" name = "fill_''' + str( i) + '''1" />''' item = item_first_line + item_second_line question_strings = question_strings + item job.update({ 'title': title, 'max_judgments_per_worker': max_judgments_per_worker, 'units_per_assignment': units_per_assignment, 'judgments_per_unit': judgments_per_unit, 'payment_cents': payment_cents, 'instructions': ''' <h3><em><strong>Semantic Discrimination</strong> </em></h3> <p><em><strong>Judge whether the two paragraphs describe the same object or not.</strong></em></p> ''', 'cml': question_strings, 'options': { 'front_load': 0, # quiz mode = 1; turn off with 0 } }) job.tags = [job_distinguish_tag] job.launch(len(refined_question_content), channels=('on_demand', 'cf_internal'))
def collect_answers_from_crowdflower(job_distinguish_tag): conn = crowdflower.Connection(api_key=api_key.api_key_string) result = [] out_dir = os.path.dirname(os.path.realpath(__file__)) + '/cf_temp' for job in conn.jobs(): if job_distinguish_tag in job.tags: job.download_csv(out_dir + os.path.sep + str(job.id) + '.csv') reader = csv.reader( file(out_dir + os.path.sep + str(job.id) + '.csv', 'rb')) first_line = True for line in reader: if first_line == True: for index, item in enumerate(line): if item == 'id': id_index = index col_list = {} for index, item in enumerate(line): if "answer_attr" in item: attr_string = item.split("answer_attr")[1] if '_dot_' in item: attr_col_string_list = item.split( "answer_attr")[0].split('_dot_') attr_col_string = attr_col_string_list[ 0] + '.' + attr_col_string_list[1] col_list[int(attr_string)] = attr_col_string elif '_dot_' not in item: col_list[int(attr_string)] = item.split( "answer_attr")[0] fill_list = [] for index, item in enumerate(line): if 'fill' in item and 'gold' not in item: number_this_time = int(item[-2]) fill_list.append((number_this_time, index)) first_line = False else: worker_id = line[7] attr_answers = {} unique_id = line[id_index] for i in range(len(fill_list)): attr_answers[col_list[i]] = line[fill_list[i][1]] if first_line == False: result.append((unique_id, worker_id, attr_answers)) else: first_line = False return job_distinguish_tag, result
def upload_questions_to_crowdflower(title, job_distinguish_tag, question_content, max_judgments_per_worker=50, units_per_assignment=3, judgments_per_unit=1, payment_cents=3): conn = crowdflower.Connection(api_key=api_key.api_key_string) job = conn.upload(question_content) options = question_content[0]['options'] first_line = ''' <img src="{{url}}" id=""> <p>{{content}}</p> <cml:radios validates="required" label="Choose the best option" name="category" class=""> ''' question_strings = first_line for i in range(len(options)): item = '''<cml:radio label="''' + str( options[i]) + '''" value = "fill_''' + str( options[i]) + '''" />''' question_strings = question_strings + item + '\n' question_strings = question_strings + '''</cml:radios>''' job.update({ 'title': title, 'max_judgments_per_worker': max_judgments_per_worker, 'units_per_assignment': units_per_assignment, 'judgments_per_unit': judgments_per_unit, 'payment_cents': payment_cents, 'instructions': ''' <h3><em><strong>Tell us the details about the country</strong> </em></h3> ''', 'cml': question_strings, 'options': { 'front_load': 0, # quiz mode = 1; turn off with 0 } }) job.tags = [job_distinguish_tag] job.launch(len(question_content), channels=('on_demand', 'cf_internal'))
def upload_questions_to_crowdflower(title, job_distinguish_tag, question_content, max_judgments_per_worker, units_per_assignment, judgments_per_unit, payment_cents): conn = crowdflower.Connection(api_key=api_key.api_key_string) refined_question_content = [] columns_content = question_content[0]['columns'] for i in range(len(question_content)): limit = int(question_content[i]['limit']) repeats = int(question_content[i]['repeats']) total_repeat_times = limit * repeats new_dict = {} for key in question_content[i]: if key != 'columns': new_dict[key] = question_content[i][key] count = 0 for item in columns_content: if '.' in item: first_part = item.split('.')[0] second_part = item.split('.')[1] new_dict[str(first_part) + '_dot_' + str(second_part) + 'answer_attr' + str(count)] = item.split('.')[1] count += 1 elif '.' not in item: new_dict[str(item) + 'answer_attr' + str(count)] = item count += 1 new_dict['number_counting'] = str(i) for j in range(total_repeat_times): refined_question_content.append(new_dict) job = conn.upload(refined_question_content) first_line = "<p>{{content}}</p>" question_strings = first_line for i in range(len(columns_content)): if '.' in columns_content[i]: first_part = str(columns_content[i].split('.')[0]) second_part = str(columns_content[i].split('.')[1]) item_first_line = "<p>{{" + first_part + '_dot_' + second_part + 'answer_attr' + str( i) + "}}</p>" item_second_line = '''<cml:text label="" validates="required" gold="true" name = "fill_''' + str( i) + '''1" />''' item = item_first_line + item_second_line question_strings = question_strings + item elif '.' not in columns_content[i]: item_first_line = "<p>{{" + columns_content[ i] + "answer_attr" + str(i) + "}}</p>" item_second_line = '''<cml:text label="" validates="required" gold="true" name = "fill_''' + str( i) + '''1" />''' item = item_first_line + item_second_line question_strings = question_strings + item job.update({ 'title': title, 'max_judgments_per_worker': max_judgments_per_worker, 'units_per_assignment': units_per_assignment, 'judgments_per_unit': judgments_per_unit, 'payment_cents': payment_cents, 'instructions': ''' <h3><em><strong>Tell us the details about the country</strong> </em></h3> ''', 'cml': question_strings, 'options': { 'front_load': 0, # quiz mode = 1; turn off with 0 } }) job.tags = [job_distinguish_tag] job.launch(len(refined_question_content), channels=('on_demand', 'cf_internal'))
import os import sys import json import random import crowdflower from crowdflower.exception import CrowdFlowerError # expects api key to be available in your environment variables; does not use cache conn = crowdflower.Connection() job_tag = 'python-example' def _find_job(): for job in conn.jobs(): if job_tag in job.tags: return job def create(): filename = os.path.join(crowdflower.root, 'examples', 'spam.txt') def iter_data(labels=None): for i, line in enumerate(open(filename), 1): label, text = line.strip().split('\t', 1) if labels is None or label in labels: # the "text" key is the only required field; the others just help us # keep track of what's been annotated and mark which are the test data yield { 'id': '{}:{}'.format(filename, i), 'text': text, 'label': label