def delete_hit(hit_id): if SANDBOX: mturk_url = 'mechanicalturk.sandbox.amazonaws.com' preview_url = 'https://workersandbox.mturk.com/mturk/preview?groupId=' else: mturk_url = 'mechanicalturk.amazonaws.com' preview_url = 'https://mturk.com/mturk/preview?groupId=' conn = MTurkConnection(aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY, host=mturk_url) conn.expire_hit(hit_id) # Give the HIT a moment to expire. time.sleep(0.25) conn.dispose_hit(hit_id) print("HIT " + hit_id + " was deleted!")
def delete_hit(hit_id): if SANDBOX: mturk_url = 'mechanicalturk.sandbox.amazonaws.com' preview_url = 'https://workersandbox.mturk.com/mturk/preview?groupId=' else: mturk_url = 'mechanicalturk.amazonaws.com' preview_url = 'https://mturk.com/mturk/preview?groupId=' conn = MTurkConnection(aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY, host=mturk_url) conn.expire_hit(hit_id) # Give the HIT a moment to expire. time.sleep(0.25) conn.dispose_hit(hit_id) print("HIT " + hit_id + " was deleted!")
class MTurkServices(object): ''' MTurk services ''' def __init__(self, aws_access_key_id, aws_secret_access_key, is_sandbox): self.update_credentials(aws_access_key_id, aws_secret_access_key) self.set_sandbox(is_sandbox) self.valid_login = self.verify_aws_login() if not self.valid_login: print 'WARNING *****************************' print 'Sorry, AWS Credentials invalid.\nYou will only be able to '\ 'test experiments locally until you enter\nvalid '\ 'credentials in the AWS Access section of ~/.psiturkconfig\n' def update_credentials(self, aws_access_key_id, aws_secret_access_key): ''' Update credentials ''' self.aws_access_key_id = aws_access_key_id self.aws_secret_access_key = aws_secret_access_key def set_sandbox(self, is_sandbox): ''' Set sandbox ''' self.is_sandbox = is_sandbox def get_reviewable_hits(self): ''' Get reviewable HITs ''' if not self.connect_to_turk(): return False try: hits = self.mtc.get_all_hits() except MTurkRequestError: return False reviewable_hits = [hit for hit in hits if hit.HITStatus == "Reviewable" \ or hit.HITStatus == "Reviewing"] hits_data = [MTurkHIT({ 'hitid': hit.HITId, 'title': hit.Title, 'status': hit.HITStatus, 'max_assignments': hit.MaxAssignments, 'number_assignments_completed': hit.NumberOfAssignmentsCompleted, 'number_assignments_pending': hit.NumberOfAssignmentsPending, 'number_assignments_available': hit.NumberOfAssignmentsAvailable, 'creation_time': hit.CreationTime, 'expiration': hit.Expiration }) for hit in reviewable_hits] return hits_data def get_all_hits(self): ''' Get all HITs ''' if not self.connect_to_turk(): return False try: hits = self.mtc.get_all_hits() except MTurkRequestError: return False hits_data = [MTurkHIT({ 'hitid': hit.HITId, 'title': hit.Title, 'status': hit.HITStatus, 'max_assignments': hit.MaxAssignments, 'number_assignments_completed': hit.NumberOfAssignmentsCompleted, 'number_assignments_pending': hit.NumberOfAssignmentsPending, 'number_assignments_available': hit.NumberOfAssignmentsAvailable, 'creation_time': hit.CreationTime, 'expiration': hit.Expiration, }) for hit in hits] return hits_data def get_active_hits(self): ''' Get active HITs ''' if not self.connect_to_turk(): return False # hits = self.mtc.search_hits() try: hits = self.mtc.get_all_hits() except MTurkRequestError: return False active_hits = [hit for hit in hits if not hit.expired] hits_data = [MTurkHIT({ 'hitid': hit.HITId, 'title': hit.Title, 'status': hit.HITStatus, 'max_assignments': hit.MaxAssignments, 'number_assignments_completed': hit.NumberOfAssignmentsCompleted, 'number_assignments_pending': hit.NumberOfAssignmentsPending, 'number_assignments_available': hit.NumberOfAssignmentsAvailable, 'creation_time': hit.CreationTime, 'expiration': hit.Expiration, }) for hit in active_hits] return hits_data def get_workers(self, assignment_status=None): ''' Get workers ''' if not self.connect_to_turk(): return False try: hits = self.mtc.get_all_hits() hit_ids = [hit.HITId for hit in hits] workers_nested = [] page_size=100 for hit_id in hit_ids: current_page_number=1 hit_assignments = self.mtc.get_assignments( hit_id, status=assignment_status, sort_by='SubmitTime', page_size=page_size, page_number=current_page_number ) totalNumResults = int(hit_assignments.TotalNumResults) total_pages = (totalNumResults // page_size) + (totalNumResults % page_size > 0) #do integer division then round up if necessary while current_page_number < total_pages: current_page_number += 1 hit_assignments += self.mtc.get_assignments( hit_id, status=assignment_status, sort_by='SubmitTime', page_size=page_size, page_number=current_page_number ) workers_nested.append(hit_assignments) workers = [val for subl in workers_nested for val in subl] # Flatten nested lists except MTurkRequestError: return False worker_data = [{ 'hitId': worker.HITId, 'assignmentId': worker.AssignmentId, 'workerId': worker.WorkerId, 'submit_time': worker.SubmitTime, 'accept_time': worker.AcceptTime, 'status': worker.AssignmentStatus } for worker in workers] return worker_data def bonus_worker(self, assignment_id, amount, reason=""): ''' Bonus worker ''' if not self.connect_to_turk(): return False try: bonus = MTurkConnection.get_price_as_price(amount) assignment = self.mtc.get_assignment(assignment_id)[0] worker_id = assignment.WorkerId self.mtc.grant_bonus(worker_id, assignment_id, bonus, reason) return True except MTurkRequestError as exception: print exception return False def approve_worker(self, assignment_id): ''' Approve worker ''' if not self.connect_to_turk(): return False try: self.mtc.approve_assignment(assignment_id, feedback=None) return True except MTurkRequestError: return False def reject_worker(self, assignment_id): ''' Reject worker ''' if not self.connect_to_turk(): return False try: self.mtc.reject_assignment(assignment_id, feedback=None) return True except MTurkRequestError: return False def unreject_worker(self, assignment_id): ''' Unreject worker ''' if not self.connect_to_turk(): return False try: self.mtc.approve_rejected_assignment(assignment_id) return True except MTurkRequestError: return False def verify_aws_login(self): ''' Verify AWS login ''' if ((self.aws_access_key_id == 'YourAccessKeyId') or (self.aws_secret_access_key == 'YourSecretAccessKey')): return False else: host = 'mechanicalturk.amazonaws.com' mturkparams = dict( aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, host=host) self.mtc = MTurkConnection(**mturkparams) try: self.mtc.get_account_balance() except MTurkRequestError as exception: print exception.error_message return False else: return True def connect_to_turk(self): ''' Connect to turk ''' if not self.valid_login: print 'Sorry, unable to connect to Amazon Mechanical Turk. AWS '\ 'credentials invalid.' return False if self.is_sandbox: host = 'mechanicalturk.sandbox.amazonaws.com' else: host = 'mechanicalturk.amazonaws.com' mturkparams = dict( aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, host=host) self.mtc = MTurkConnection(**mturkparams) return True def configure_hit(self, hit_config): ''' Configure HIT ''' # configure question_url based on the id experiment_portal_url = hit_config['ad_location'] frame_height = 600 mturk_question = ExternalQuestion(experiment_portal_url, frame_height) # Qualification: quals = Qualifications() approve_requirement = hit_config['approve_requirement'] quals.add( PercentAssignmentsApprovedRequirement("GreaterThanOrEqualTo", approve_requirement)) if hit_config['us_only']: quals.add(LocaleRequirement("EqualTo", "US")) # Create a HIT type for this HIT. hit_type = self.mtc.register_hit_type( hit_config['title'], hit_config['description'], hit_config['reward'], hit_config['duration'], keywords=hit_config['keywords'], approval_delay=None, qual_req=None)[0] # Check the config file to see if notifications are wanted. config = PsiturkConfig() config.load_config() try: url = config.get('Server Parameters', 'notification_url') all_event_types = [ "AssignmentAccepted", "AssignmentAbandoned", "AssignmentReturned", "AssignmentSubmitted", "HITReviewable", "HITExpired", ] self.mtc.set_rest_notification( hit_type.HITTypeId, url, event_types=all_event_types) except: pass # Specify all the HIT parameters self.param_dict = dict( hit_type=hit_type.HITTypeId, question=mturk_question, lifetime=hit_config['lifetime'], max_assignments=hit_config['max_assignments'], title=hit_config['title'], description=hit_config['description'], keywords=hit_config['keywords'], reward=hit_config['reward'], duration=hit_config['duration'], approval_delay=None, questions=None, qualifications=quals, response_groups=[ 'Minimal', 'HITDetail', 'HITQuestion', 'HITAssignmentSummary' ]) def check_balance(self): ''' Check balance ''' if not self.connect_to_turk(): return '-' return self.mtc.get_account_balance()[0] # TODO (if valid AWS credentials haven't been provided then # connect_to_turk() will fail, not error checking here and elsewhere) def create_hit(self, hit_config): ''' Create HIT ''' try: if not self.connect_to_turk(): return False self.configure_hit(hit_config) myhit = self.mtc.create_hit(**self.param_dict)[0] self.hitid = myhit.HITId except: return False else: return self.hitid # TODO(Jay): Have a wrapper around functions that serializes them. # Default output should not be serialized. def expire_hit(self, hitid): ''' Expire HIT ''' if not self.connect_to_turk(): return False try: self.mtc.expire_hit(hitid) return True except MTurkRequestError: print "Failed to expire HIT. Please check the ID and try again." return False def dispose_hit(self, hitid): ''' Dispose HIT ''' if not self.connect_to_turk(): return False try: self.mtc.dispose_hit(hitid) except Exception, e: print "Failed to dispose of HIT %s. Make sure there are no "\ "assignments remaining to be reviewed." % hitid
#!flask/bin/python # Script that disable/expires all current HITS released under me as a requester. # Disable means completely delete the HIT # Expire means Workers can't view it anymore but you can still review and approve/reject it. from boto.mturk.connection import MTurkConnection from secret import SECRET_KEY,ACCESS_KEY,AMAZON_HOST #Start Configuration Variables AWS_ACCESS_KEY_ID = ACCESS_KEY AWS_SECRET_ACCESS_KEY = SECRET_KEY connection = MTurkConnection(aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, host=AMAZON_HOST) hits_lst = list(connection.get_all_hits()) print hits_lst for hit in hits_lst: print "Expiring HIT ID: ",hit.HITId connection.expire_hit(hit.HITId) #print "Disabling HIT ID: ",hit.HITId #connection.disable_hit(hit.HITId)
def check_db_for_missing_notifications(): """Check the database for missing notifications.""" aws_access_key_id = os.environ['aws_access_key_id'] aws_secret_access_key = os.environ['aws_secret_access_key'] if config.getboolean('Shell Parameters', 'launch_in_sandbox_mode'): conn = MTurkConnection( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, host='mechanicalturk.sandbox.amazonaws.com') else: conn = MTurkConnection( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) # get all participants with status < 100 participants = Participant.query.filter_by(status="working").all() # get current time current_time = datetime.now() # get experiment duration in seconds duration = float(config.get('HIT Configuration', 'duration')) * 60 * 60 # for each participant, if current_time - start_time > duration + 5 mins for p in participants: p_time = (current_time - p.creation_time).total_seconds() if p_time > (duration + 120): print ("Error: participant {} with status {} has been playing for too " "long and no notification has arrived - " "running emergency code".format(p.id, p.status)) # get their assignment assignment_id = p.assignment_id # ask amazon for the status of the assignment try: assignment = conn.get_assignment(assignment_id)[0] status = assignment.AssignmentStatus except: status = None print "assignment status from AWS is {}".format(status) hit_id = p.hit_id # general email settings: username = os.getenv('dallinger_email_username') fromaddr = username + "@gmail.com" email_password = os.getenv("dallinger_email_key") toaddr = config.get('HIT Configuration', 'contact_email_on_error') whimsical = os.getenv("whimsical") if status == "Approved": # if its been approved, set the status accordingly print "status set to approved" p.status = "approved" session.commit() elif status == "Rejected": print "status set to rejected" # if its been rejected, set the status accordingly p.status = "rejected" session.commit() elif status == "Submitted": # if it has been submitted then resend a submitted notification args = { 'Event.1.EventType': 'AssignmentSubmitted', 'Event.1.AssignmentId': assignment_id } requests.post( "http://" + os.environ['HOST'] + '/notifications', data=args) # send the researcher an email to let them know if whimsical: msg = MIMEText( """Dearest Friend,\n\nI am writing to let you know that at {}, during my regular (and thoroughly enjoyable) perousal of the most charming participant data table, I happened to notice that assignment {} has been taking longer than we were expecting. I recall you had suggested {} minutes as an upper limit for what was an acceptable length of time for each assignement , however this assignment had been underway for a shocking {} minutes, a full {} minutes over your allowance. I immediately dispatched a telegram to our mutual friends at AWS and they were able to assure me that although the notification had failed to be correctly processed, the assignment had in fact been completed. Rather than trouble you, I dealt with this myself and I can assure you there is no immediate cause for concern. Nonetheless, for my own peace of mind, I would appreciate you taking the time to look into this matter at your earliest convenience.\n\nI remain your faithful and obedient servant, \nWilliam H. Dallinger\n\n P.S. Please do not respond to this message, I am busy with other matters.""".format( datetime.now(), assignment_id, round(duration/60), round(p_time/60), round((p_time-duration)/60))) msg['Subject'] = "A matter of minor concern." else: msg = MIMEText( """Dear experimenter,\n\nThis is an automated email from Dallinger. You are receiving this email because the Dallinger platform has discovered evidence that a notification from Amazon Web Services failed to arrive at the server. Dallinger has automatically contacted AWS and has determined the dropped notification was a submitted notification (i.e. the participant has finished the experiment). This is a non-fatal error and so Dallinger has auto-corrected the problem. Nonetheless you may wish to check the database.\n\nBest,\nThe Dallinger dev. team.\n\n Error details:\nAssignment: {} \nAllowed time: {}\nTime since participant started: {}""").format( assignment_id, round(duration/60), round(p_time/60)) msg['Subject'] = "Dallinger automated email - minor error." # This method commented out as gmail now blocks emails from # new locations # server = smtplib.SMTP('smtp.gmail.com:587') # server.starttls() # server.login(username, email_password) # server.sendmail(fromaddr, toaddr, msg.as_string()) # server.quit() print ("Error - submitted notification for participant {} missed. " "Database automatically corrected, but proceed with caution." .format(p.id)) else: # if it has not been submitted shut everything down # first turn off autorecruit host = os.environ['HOST'] host = host[:-len(".herokuapp.com")] args = json.dumps({"auto_recruit": "false"}) headers = { "Accept": "application/vnd.heroku+json; version=3", "Content-Type": "application/json" } heroku_email_address = os.getenv('heroku_email_address') heroku_password = os.getenv('heroku_password') requests.patch( "https://api.heroku.com/apps/{}/config-vars".format(host), data=args, auth=(heroku_email_address, heroku_password), headers=headers) # then force expire the hit via boto conn.expire_hit(hit_id) # send the researcher an email to let them know if whimsical: msg = MIMEText( """Dearest Friend,\n\nI am afraid I write to you with most grave tidings. At {}, during a routine check of the usually most delightful participant data table, I happened to notice that assignment {} has been taking longer than we were expecting. I recall you had suggested {} minutes as an upper limit for what was an acceptable length of time for each assignment, however this assignment had been underway for a shocking {} minutes, a full {} minutes over your allowance. I immediately dispatched a telegram to our mutual friends at AWS and they infact informed me that they had already sent us a notification which we must have failed to process, implying that the assignment had not been successfully completed. Of course when the seriousness of this scenario dawned on me I had to depend on my trusting walking stick for support: without the notification I didn't know to remove the old assignment's data from the tables and AWS will have already sent their replacement, meaning that the tables may already be in a most unsound state!\n\nI am sorry to trouble you with this, however, I do not know how to proceed so rather than trying to remedy the scenario myself, I have instead temporarily ceased operations by expiring the HIT with the fellows at AWS and have refrained from posting any further invitations myself. Once you see fit I would be most appreciative if you could attend to this issue with the caution, sensitivity and intelligence for which I know you so well.\n\nI remain your faithful and obedient servant,\nWilliam H. Dallinger\n\nP.S. Please do not respond to this message, I am busy with other matters.""".format( datetime.now(), assignment_id, round(duration/60), round(p_time/60), round((p_time-duration)/60))) msg['Subject'] = "Most troubling news." else: msg = MIMEText( """Dear experimenter,\n\nThis is an automated email from Dallinger. You are receiving this email because the Dallinger platform has discovered evidence that a notification from Amazon Web Services failed to arrive at the server. Dallinger has automatically contacted AWS and has determined the dropped notification was an abandoned/returned notification (i.e. the participant had returned the experiment or had run out of time). This is a serious error and so Dallinger has paused the experiment - expiring the HIT on MTurk and setting auto_recruit to false. Participants currently playing will be able to finish, however no further participants will be recruited until you do so manually. We strongly suggest you use the details below to check the database to make sure the missing notification has not caused additional problems before resuming.\nIf you are receiving a lot of these emails this suggests something is wrong with your experiment code.\n\nBest, \nThe Dallinger dev. team.\n\n Error details:\nAssignment: {} \nAllowed time: {}\nTime since participant started: {}""").format( assignment_id, round(duration/60), round(p_time/60)) msg['Subject'] = "Dallinger automated email - major error." # This method commented out as gmail now blocks emails from # new locations # server = smtplib.SMTP('smtp.gmail.com:587') # server.starttls() # server.login(username, email_password) # server.sendmail(fromaddr, toaddr, msg.as_string()) # server.quit() # send a notificationmissing notification args = { 'Event.1.EventType': 'NotificationMissing', 'Event.1.AssignmentId': assignment_id } requests.post( "http://" + os.environ['HOST'] + '/notifications', data=args) print ("Error - abandoned/returned notification for participant {} missed. " "Experiment shut down. Please check database and then manually " "resume experiment." .format(p.id))
nargs='+', help='additional configuration files') args = parser.parse_args() mturk_cfg_fname = as_project_path('resources/private/mturk.cfg') cfg = Config.load_configs([mturk_cfg_fname] + args.configs, log=False) print "Expire all HITs" conn = MTurkConnection( aws_access_key_id=cfg['MTURK']['aws_access_key_id'], aws_secret_access_key=cfg['MTURK']['aws_secret_access_key'], host=cfg['MTURK']['host']) for pnum in range(1, 50): for hit in conn.search_hits(page_size=100, page_number=pnum): print "HITId:", hit.HITId hitStatus = defaultdict(int) for ass in conn.get_assignments(hit.HITId, status='Submitted', page_size=10, page_number=1): #print "Dir ass:", dir(ass) hitStatus[ass.AssignmentStatus] += 1 print hitStatus print 'Expiring hit:', hit.HITId conn.expire_hit(hit.HITId)
class MTurkProvider(object): description = 'This is a task authored by a requester on Daemo, a research crowdsourcing platform. ' \ 'Mechanical Turk workers are welcome to do it' keywords = ['daemo'] countries = ['US', 'CA'] min_hits = 1000 def __init__(self, host, aws_access_key_id, aws_secret_access_key): self.host = host self.connection = MTurkConnection( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, host=settings.MTURK_HOST) self.connection.APIVersion = "2014-08-15" if not self.host: raise ValueError("Please provide a host url") def get_connection(self): return self.connection @staticmethod def _mturk_system_qualifications(qualification): requirements = [] for item in qualification.items.all(): if item.expression['attribute'] not in [ 'location', 'approval_rate', 'total_tasks' ]: continue requirement = None if item.expression['attribute'] == 'location': op = OP_IN if item.expression['operator'] == 'in' else OP_NOT_IN requirement = MultiLocaleRequirement(op, [ val.strip() for val in item.expression['value'] if val is not None and val != '' ]) elif item.expression['attribute'] == 'approval_rate': op = OP_GT if item.expression['operator'] == 'gt' else OP_LT requirement = PercentAssignmentsApprovedRequirement( op, item.expression['value']) elif item.expression['attribute'] == 'total_tasks': op = OP_GT if item.expression['operator'] == 'gt' else OP_LT requirement = NumberHitsApprovedRequirement( op, item.expression['value']) requirements.append(requirement) return requirements def get_qualifications(self, project, boomerang_threshold, add_boomerang): requirements = [] if project.qualification is not None: requirements += self._mturk_system_qualifications( project.qualification) boomerang_qual, success = self.create_qualification_type( owner_id=project.owner_id, project_id=project.group_id, name='Boomerang Score #{}'.format(project.group_id), flag=FLAG_Q_BOOMERANG, description='No description available') boomerang = None if boomerang_threshold <= int(settings.BOOMERANG_MIDPOINT * 100): for i, bucket in enumerate(WAIT_LIST_BUCKETS): if int(bucket[1] * 100) <= boomerang_threshold: boomerang_blacklist, success = \ self.create_qualification_type(owner_id=project.owner_id, name='Boomerang Waitlist #{}-{}'.format(project.group_id, len( WAIT_LIST_BUCKETS) - i), flag=FLAG_Q_BOOMERANG, description='No description available', deny=True, project_id=project.group_id, bucket=bucket) if success and add_boomerang: boomerang = BoomerangRequirement( qualification_type_id=boomerang_blacklist.type_id, comparator=OP_DNE, integer_value=None) requirements.append(boomerang) else: boomerang = BoomerangRequirement( qualification_type_id=boomerang_qual.type_id, comparator=OP_GTEQ, integer_value=boomerang_threshold) if success and add_boomerang: requirements.append(boomerang) return Qualifications(requirements), boomerang_qual def create_hits(self, project, tasks=None, repetition=None): # if project.min_rating > 0: # return 'NOOP' if not tasks: cursor = connection.cursor() # noinspection SqlResolve query = ''' SELECT max(id) id, repetition, group_id, repetition - sum(existing_assignments) remaining_assignments, min_rating FROM ( SELECT t_rev.id, t.group_id, t.min_rating, p.repetition, CASE WHEN ma.id IS NULL OR ma.status IN (%(skipped)s, %(rejected)s, %(expired)s) THEN 0 ELSE 1 END existing_assignments FROM crowdsourcing_task t INNER JOIN crowdsourcing_project p ON t.project_id = p.id INNER JOIN crowdsourcing_task t_rev ON t_rev.group_id = t.group_id LEFT OUTER JOIN mturk_mturkhit mh ON mh.task_id = t_rev.id LEFT OUTER JOIN mturk_mturkassignment ma ON ma.hit_id = mh.id WHERE t.project_id = (%(project_id)s) AND t_rev.exclude_at IS NULL AND t_rev.deleted_at IS NULL ) t GROUP BY group_id, repetition, min_rating HAVING sum(existing_assignments) < repetition; ''' cursor.execute( query, { 'skipped': TaskWorker.STATUS_SKIPPED, 'rejected': TaskWorker.STATUS_REJECTED, 'expired': TaskWorker.STATUS_EXPIRED, 'project_id': project.id }) tasks = cursor.fetchall() rated_workers = Rating.objects.filter( origin_type=Rating.RATING_REQUESTER).count() add_boomerang = rated_workers > 0 duration = project.timeout if project.timeout is not None else datetime.timedelta( hours=24) lifetime = project.deadline - timezone.now( ) if project.deadline is not None else datetime.timedelta(days=7) for task in tasks: question = self.create_external_question(task[0]) mturk_hit = MTurkHIT.objects.filter(task_id=task[0]).first() qualifications, boomerang_qual = self.get_qualifications( project=project, boomerang_threshold=int(round(task[4], 2) * 100), add_boomerang=add_boomerang) qualifications_mask = 0 if qualifications is not None: qualifications_mask = FLAG_Q_LOCALE + FLAG_Q_HITS + FLAG_Q_RATE + FLAG_Q_BOOMERANG hit_type, success = self.create_hit_type( title=project.name, description=self.description, price=project.price, duration=duration, keywords=self.keywords, approval_delay=datetime.timedelta(days=2), qual_req=qualifications, qualifications_mask=qualifications_mask, boomerang_threshold=int(round(task[4], 2) * 100), owner_id=project.owner_id, boomerang_qual=boomerang_qual) if not success: return 'FAILURE' if mturk_hit is None: try: hit = self.connection.create_hit( hit_type=hit_type.string_id, max_assignments=task[3], lifetime=lifetime, question=question)[0] self.set_notification(hit_type_id=hit.HITTypeId) mturk_hit = MTurkHIT(hit_id=hit.HITId, hit_type=hit_type, task_id=task[0]) except MTurkRequestError as e: error = e.errors[0][0] if error == 'AWS.MechanicalTurk.InsufficientFunds': message = { "type": "ERROR", "detail": "Insufficient funds on your Mechanical Turk account!", "code": error } redis_publisher = RedisPublisher(facility='bot', users=[project.owner]) message = RedisMessage(json.dumps(message)) redis_publisher.publish_message(message) return 'FAILED' else: if mturk_hit.hit_type_id != hit_type.id: result, success = self.change_hit_type_of_hit( hit_id=mturk_hit.hit_id, hit_type_id=hit_type.string_id) if success: mturk_hit.hit_type = hit_type mturk_hit.save() return 'SUCCESS' def create_hit_type(self, owner_id, title, description, price, duration, boomerang_threshold, keywords=None, approval_delay=None, qual_req=None, qualifications_mask=0, boomerang_qual=None): hit_type = MTurkHITType.objects.filter( owner_id=owner_id, name=title, description=description, price=Decimal(str(price)), duration=duration, qualifications_mask=qualifications_mask, boomerang_threshold=boomerang_threshold).first() if hit_type is not None: return hit_type, True reward = Price(price) try: mturk_ht = self.connection.register_hit_type( title=title, description=description, reward=reward, duration=duration, keywords=keywords, approval_delay=approval_delay, qual_req=qual_req)[0] hit_type = MTurkHITType(owner_id=owner_id, name=title, description=description, price=Decimal(str(price)), keywords=keywords, duration=duration, qualifications_mask=qualifications_mask, boomerang_qualification=boomerang_qual, boomerang_threshold=boomerang_threshold) hit_type.string_id = mturk_ht.HITTypeId hit_type.save() except MTurkRequestError: return None, False return hit_type, True def create_external_question(self, task, frame_height=800): task_hash = Hashids(salt=settings.SECRET_KEY, min_length=settings.ID_HASH_MIN_LENGTH) task_id = task_hash.encode(task) url = self.host + '/mturk/task/?taskId=' + task_id question = ExternalQuestion(external_url=url, frame_height=frame_height) return question def update_max_assignments(self, task): task = Task.objects.get(id=task['id']) mturk_hit = task.mturk_hit if not mturk_hit: raise MTurkHIT.DoesNotExist( "This task is not associated to any mturk hit") assignments_completed = task.task_workers.filter(~Q(status__in=[ TaskWorker.STATUS_REJECTED, TaskWorker.STATUS_SKIPPED, TaskWorker.STATUS_EXPIRED ])).count() remaining_assignments = task.project.repetition - assignments_completed if remaining_assignments > 0 and mturk_hit.num_assignments == mturk_hit.mturk_assignments. \ filter(status=TaskWorker.STATUS_SUBMITTED).count() and \ mturk_hit.mturk_assignments.filter(status=TaskWorker.STATUS_IN_PROGRESS).count() == 0: self.add_assignments(hit_id=mturk_hit.hit_id, increment=1) self.extend_hit(hit_id=mturk_hit.hit_id) mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS mturk_hit.num_assignments += 1 mturk_hit.save() elif remaining_assignments == 0: self.expire_hit(hit_id=mturk_hit.hit_id) mturk_hit.status = MTurkHIT.STATUS_EXPIRED mturk_hit.save() elif remaining_assignments > 0 and \ mturk_hit.status == MTurkHIT.STATUS_EXPIRED: self.extend_hit(hit_id=mturk_hit.hit_id) mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS return 'SUCCESS' def get_assignment(self, assignment_id): try: return self.connection.get_assignment(assignment_id)[0], True except MTurkRequestError as e: error = e.errors[0][0] if error == 'AWS.MechanicalTurk.InvalidAssignmentState': return assignment_id, False return None, False def set_notification(self, hit_type_id): self.connection.set_rest_notification( hit_type=hit_type_id, url=self.host + '/api/mturk/notification', event_types=[ 'AssignmentReturned', 'AssignmentAbandoned', 'AssignmentAccepted', 'AssignmentSubmitted' ]) def approve_assignment(self, task_worker): task_worker_obj = TaskWorker.objects.get(id=task_worker['id']) if hasattr(task_worker_obj, 'mturk_assignments' ) and task_worker_obj.mturk_assignments.first() is not None: try: self.connection.approve_assignment( task_worker_obj.mturk_assignments.first().assignment_id) except MTurkRequestError: return False return True def reject_assignment(self, task_worker): task_worker_obj = TaskWorker.objects.get(id=task_worker['id']) if hasattr(task_worker_obj, 'mturk_assignments' ) and task_worker_obj.mturk_assignments.first() is not None: try: self.connection.reject_assignment( task_worker_obj.mturk_assignments.first().assignment_id) except MTurkRequestError: return False return True def expire_hit(self, hit_id): try: self.connection.expire_hit(hit_id) except MTurkRequestError: return False return True def disable_hit(self, hit_id): try: self.connection.disable_hit(hit_id) except MTurkRequestError: return False return True def extend_hit(self, hit_id): try: self.connection.extend_hit(hit_id=hit_id, expiration_increment=604800) # 7 days except MTurkRequestError: return False return True def add_assignments(self, hit_id, increment=1): try: self.connection.extend_hit(hit_id=hit_id, assignments_increment=increment) except MTurkRequestError: return False return True def test_connection(self): try: return self.connection.get_account_balance()[0], True except MTurkRequestError as e: error = e.errors[0][0] if error == 'AWS.NotAuthorized': return None, False return None, False def get_account_balance(self): try: return self.connection.get_account_balance()[0] except MTurkRequestError: return None def create_qualification_type(self, owner_id, name, flag, description, project_id, auto_granted=False, auto_granted_value=None, deny=False, bucket=None): # noinspection SqlResolve query = ''' SELECT * FROM ( SELECT task.target_id, task.username, round(task.task_w_avg::NUMERIC, 2) rating --round(coalesce(task.task_w_avg, requester.requester_w_avg, -- platform.platform_w_avg)::NUMERIC, 2) rating FROM ( SELECT target_id, origin_id, project_id, username, sum(weight * power((%(BOOMERANG_TASK_ALPHA)s), t.row_number)) / sum(power((%(BOOMERANG_TASK_ALPHA)s), t.row_number)) task_w_avg FROM ( SELECT r.id, r.origin_id, p.group_id project_id, weight, r.target_id, -1 + row_number() OVER (PARTITION BY target_id ORDER BY tw.created_at DESC) AS row_number, u.username username FROM crowdsourcing_rating r INNER JOIN crowdsourcing_task t ON t.id = r.task_id INNER JOIN crowdsourcing_project p ON p.id = t.project_id INNER JOIN crowdsourcing_taskworker tw ON t.id = tw.task_id AND tw.worker_id=r.target_id INNER JOIN auth_user u ON u.id = r.target_id WHERE origin_id = (%(origin_id)s) AND origin_type = (%(origin_type)s)) t GROUP BY origin_id, target_id, project_id, username) task WHERE task.project_id = (%(project_id)s) ) r ''' extra_query = 'WHERE rating BETWEEN (%(lower_bound)s) AND (%(upper_bound)s);' params = { 'origin_type': Rating.RATING_REQUESTER, 'origin_id': owner_id, 'project_id': project_id, 'BOOMERANG_REQUESTER_ALPHA': settings.BOOMERANG_REQUESTER_ALPHA, 'BOOMERANG_PLATFORM_ALPHA': settings.BOOMERANG_PLATFORM_ALPHA, 'BOOMERANG_TASK_ALPHA': settings.BOOMERANG_TASK_ALPHA } obj_params = {'upper_bound': 300, 'lower_bound': 100} if deny and bucket is not None: query += extra_query params.update({'upper_bound': bucket[1], 'lower_bound': bucket[0]}) obj_params.update({ 'upper_bound': bucket[1] * 100, 'lower_bound': bucket[0] * 100, 'is_blacklist': True }) cursor = connection.cursor() cursor.execute(query, params=params) worker_ratings_raw = cursor.fetchall() worker_ratings = [{ "worker_id": r[0], "worker_username": r[1], "rating": r[2] } for r in worker_ratings_raw] qualification = MTurkQualification.objects.filter(owner_id=owner_id, flag=flag, name=name).first() assigned_workers = [] if qualification is None: try: qualification_type = self.connection. \ create_qualification_type(name=name, description=description, status='Active', auto_granted=auto_granted, auto_granted_value=auto_granted_value)[0] qualification = MTurkQualification.objects.create( owner_id=owner_id, flag=flag, name=name, description=description, auto_granted=auto_granted, auto_granted_value=auto_granted_value, type_id=qualification_type.QualificationTypeId, **obj_params) except MTurkRequestError: return None, False else: assigned_workers = MTurkWorkerQualification.objects.values( 'worker').filter(qualification=qualification).values_list( 'worker', flat=True) for rating in worker_ratings: user_name = rating["worker_username"].split('.') if len(user_name) == 2 and user_name[0] == 'mturk': mturk_worker_id = user_name[1].upper() if mturk_worker_id not in assigned_workers: self.assign_qualification( qualification_type_id=qualification.type_id, worker_id=mturk_worker_id, value=int(rating['rating'] * 100)) defaults = { 'qualification': qualification, 'worker': mturk_worker_id, 'score': int(rating['rating'] * 100) } MTurkWorkerQualification.objects.update_or_create( qualification=qualification, worker=mturk_worker_id, defaults=defaults) return qualification, True def change_hit_type_of_hit(self, hit_id, hit_type_id): try: result = self.connection.change_hit_type_of_hit( hit_id=hit_id, hit_type=hit_type_id) except MTurkRequestError: return None, False return result, True def update_worker_boomerang(self, project_id, worker_id, task_avg, requester_avg): """ Update boomerang for project Args: project_id: worker_id: task_avg: requester_avg Returns: str """ hit = MTurkHIT.objects.select_related( 'hit_type__boomerang_qualification').filter( task__project__group_id=project_id).first() if hit is not None: qualification = hit.hit_type.boomerang_qualification worker_qual = MTurkWorkerQualification.objects.filter( qualification=qualification, worker=worker_id).first() if worker_qual is not None: self.update_score(worker_qual, score=int(task_avg * 100), override=True) else: MTurkWorkerQualification.objects.create( qualification=qualification, worker=worker_id, score=int(task_avg * 100), overwritten=True) self.assign_qualification( qualification_type_id=qualification.type_id, worker_id=worker_id, value=int(task_avg * 100)) # other_quals = MTurkWorkerQualification.objects.filter(~Q(qualification=qualification), # worker=worker_id, # overwritten=False) # for q in other_quals: # self.update_score(q, score=int(requester_avg * 100)) return 'SUCCESS' def update_score(self, worker_qual, score, override=False): if worker_qual is None: return False try: self.connection.update_qualification_score( worker_qual.qualification.type_id, worker_qual.worker, score) worker_qual.overwritten = override worker_qual.score = score worker_qual.save() except MTurkRequestError: return False return True def assign_qualification(self, qualification_type_id, worker_id, value=1): """ Revoke a qualification from a WorkerId Args: qualification_type_id: worker_id: value Returns: bool """ try: self.connection.assign_qualification(qualification_type_id, worker_id, value, send_notification=False) return True except MTurkRequestError: return False def revoke_qualification(self, qualification_type_id, worker_id): try: self.connection.revoke_qualification( qualification_type_id=qualification_type_id, subject_id=worker_id) return True except MTurkRequestError: return False def notify_workers(self, worker_ids, subject, message_text): try: self.connection.notify_workers(worker_ids, subject, message_text) return True except MTurkRequestError: return False
class MTurkServices: def __init__(self, aws_access_key_id, aws_secret_access_key, is_sandbox): self.update_credentials(aws_access_key_id, aws_secret_access_key) self.set_sandbox(is_sandbox) self.validLogin = self.verify_aws_login() if not self.validLogin: print 'WARNING *****************************' print 'Sorry, AWS Credentials invalid.\nYou will only be able to '\ + 'test experiments locally until you enter\nvalid '\ + 'credentials in the AWS Access section of ~/.psiturkconfig\n' def update_credentials(self, aws_access_key_id, aws_secret_access_key): self.aws_access_key_id = aws_access_key_id self.aws_secret_access_key = aws_secret_access_key def set_sandbox(self, is_sandbox): self.is_sandbox = is_sandbox def get_reviewable_hits(self): if not self.connect_to_turk(): return False try: hits = self.mtc.get_all_hits() except MTurkRequestError: return False reviewable_hits = [hit for hit in hits if (hit.HITStatus == "Reviewable" or hit.HITStatus == "Reviewing")] hits_data = [MTurkHIT({'hitid': hit.HITId, 'title': hit.Title, 'status': hit.HITStatus, 'max_assignments': hit.MaxAssignments, 'number_assignments_completed': hit.NumberOfAssignmentsCompleted, 'number_assignments_pending': hit.NumberOfAssignmentsPending, 'number_assignments_available': hit.NumberOfAssignmentsAvailable, 'creation_time': hit.CreationTime, 'expiration': hit.Expiration, }) for hit in reviewable_hits] return(hits_data) def get_all_hits(self): if not self.connect_to_turk(): return False try: hits = self.mtc.get_all_hits() except MTurkRequestError: return False hits_data = [MTurkHIT({'hitid': hit.HITId, 'title': hit.Title, 'status': hit.HITStatus, 'max_assignments': hit.MaxAssignments, 'number_assignments_completed': hit.NumberOfAssignmentsCompleted, 'number_assignments_pending': hit.NumberOfAssignmentsPending, 'number_assignments_available': hit.NumberOfAssignmentsAvailable, 'creation_time': hit.CreationTime, 'expiration': hit.Expiration, }) for hit in hits] return(hits_data) def get_active_hits(self): if not self.connect_to_turk(): return False # hits = self.mtc.search_hits() try: hits = self.mtc.get_all_hits() except MTurkRequestError: return False active_hits = [hit for hit in hits if not(hit.expired)] hits_data = [MTurkHIT({'hitid': hit.HITId, 'title': hit.Title, 'status': hit.HITStatus, 'max_assignments': hit.MaxAssignments, 'number_assignments_completed': hit.NumberOfAssignmentsCompleted, 'number_assignments_pending': hit.NumberOfAssignmentsPending, 'number_assignments_available': hit.NumberOfAssignmentsAvailable, 'creation_time': hit.CreationTime, 'expiration': hit.Expiration, }) for hit in active_hits] return(hits_data) def get_workers(self, assignmentStatus = None): if not self.connect_to_turk(): return False try: hits = self.mtc.search_hits(sort_direction='Descending', page_size=20) hit_ids = [hit.HITId for hit in hits] workers_nested = [self.mtc.get_assignments( hit_id, status=assignmentStatus, sort_by='SubmitTime', page_size=100 ) for hit_id in hit_ids] workers = [val for subl in workers_nested for val in subl] # Flatten nested lists except MTurkRequestError: return(False) worker_data = [{'hitId': worker.HITId, 'assignmentId': worker.AssignmentId, 'workerId': worker.WorkerId, 'submit_time': worker.SubmitTime, 'accept_time': worker.AcceptTime, 'status': worker.AssignmentStatus } for worker in workers] return(worker_data) def bonus_worker(self, assignment_id, amount, reason=""): if not self.connect_to_turk(): return False try: bonus = MTurkConnection.get_price_as_price(amount) assignment = self.mtc.get_assignment(assignment_id)[0] workerId = assignment.WorkerId self.mtc.grant_bonus(workerId, assignment_id, bonus, reason) return True except MTurkRequestError as e: print e return False def approve_worker(self, assignment_id): if not self.connect_to_turk(): return(False) try: self.mtc.approve_assignment(assignment_id, feedback=None) return True except MTurkRequestError: return(False) def reject_worker(self, assignment_id): if not self.connect_to_turk(): return False try: self.mtc.reject_assignment(assignment_id, feedback=None) return True except MTurkRequestError: return(False) def unreject_worker(self, assignment_id): if not self.connect_to_turk(): return False try: self.mtc.approve_rejected_assignment(assignment_id) return True except MTurkRequestError: return False def verify_aws_login(self): if (self.aws_access_key_id == 'YourAccessKeyId') or (self.aws_secret_access_key == 'YourSecretAccessKey'): return False else: host = 'mechanicalturk.amazonaws.com' mturkparams = dict( aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, host=host) self.mtc = MTurkConnection(**mturkparams) try: self.mtc.get_account_balance() except MTurkRequestError as e: print(e.error_message) return False else: return True def connect_to_turk(self): if not self.validLogin: print 'Sorry, unable to connect to Amazon Mechanical Turk. AWS credentials invalid.' return False if self.is_sandbox: host = 'mechanicalturk.sandbox.amazonaws.com' else: host = 'mechanicalturk.amazonaws.com' mturkparams = dict( aws_access_key_id = self.aws_access_key_id, aws_secret_access_key = self.aws_secret_access_key, host=host) self.mtc = MTurkConnection(**mturkparams) return True def configure_hit(self, hit_config): # configure question_url based on the id experimentPortalURL = hit_config['ad_location'] frameheight = 600 mturkQuestion = ExternalQuestion(experimentPortalURL, frameheight) # Qualification: quals = Qualifications() approve_requirement = hit_config['approve_requirement'] quals.add( PercentAssignmentsApprovedRequirement("GreaterThanOrEqualTo", approve_requirement)) if hit_config['us_only']: quals.add(LocaleRequirement("EqualTo", "US")) # Specify all the HIT parameters self.paramdict = dict( hit_type = None, question = mturkQuestion, lifetime = hit_config['lifetime'], max_assignments = hit_config['max_assignments'], title = hit_config['title'], description = hit_config['description'], keywords = hit_config['keywords'], reward = hit_config['reward'], duration = hit_config['duration'], approval_delay = None, questions = None, qualifications = quals ) def check_balance(self): if not self.connect_to_turk(): return('-') return(self.mtc.get_account_balance()[0]) # TODO (if valid AWS credentials haven't been provided then connect_to_turk() will # fail, not error checking here and elsewhere) def create_hit(self, hit_config): try: if not self.connect_to_turk(): return False self.configure_hit(hit_config) myhit = self.mtc.create_hit(**self.paramdict)[0] self.hitid = myhit.HITId except: return False else: return self.hitid # TODO(Jay): Have a wrapper around functions that serializes them. # Default output should not be serialized. def expire_hit(self, hitid): if not self.connect_to_turk(): return False try: self.mtc.expire_hit(hitid) return True except MTurkRequestError: print "Failed to expire HIT. Please check the ID and try again." return False def dispose_hit(self, hitid): if not self.connect_to_turk(): return False try: self.mtc.dispose_hit(hitid) except Exception, e: print 'Failed to dispose of HIT %s. Make sure there are no assignments remaining to be reviewed' % hitid
mainly information that you do not want to share. """) parser.add_argument('-c', "--configs", nargs='+', help='additional configuration files') args = parser.parse_args() mturk_cfg_fname = as_project_path('resources/private/mturk.cfg') cfg = Config.load_configs([mturk_cfg_fname] + args.configs, log=False) print "Expire all HITs" conn = MTurkConnection(aws_access_key_id = cfg['MTURK']['aws_access_key_id'], aws_secret_access_key = cfg['MTURK']['aws_secret_access_key'], host = cfg['MTURK']['host']) for pnum in range(1, 50): for hit in conn.search_hits(page_size=100, page_number=pnum): print "HITId:", hit.HITId hitStatus = defaultdict(int) for ass in conn.get_assignments(hit.HITId, status='Submitted', page_size=10, page_number=1): #print "Dir ass:", dir(ass) hitStatus[ass.AssignmentStatus] += 1 print hitStatus print 'Expiring hit:', hit.HITId conn.expire_hit(hit.HITId)
class MTurkServices(object): ''' MTurk services ''' def __init__(self, aws_access_key_id, aws_secret_access_key, is_sandbox): self.update_credentials(aws_access_key_id, aws_secret_access_key) self.set_sandbox(is_sandbox) self.valid_login = self.verify_aws_login() if not self.valid_login: print 'WARNING *****************************' print 'Sorry, AWS Credentials invalid.\nYou will only be able to '\ 'test experiments locally until you enter\nvalid '\ 'credentials in the AWS Access section of ~/.psiturkconfig\n' def update_credentials(self, aws_access_key_id, aws_secret_access_key): ''' Update credentials ''' self.aws_access_key_id = aws_access_key_id self.aws_secret_access_key = aws_secret_access_key def set_sandbox(self, is_sandbox): ''' Set sandbox ''' self.is_sandbox = is_sandbox def get_reviewable_hits(self): ''' Get reviewable HITs ''' if not self.connect_to_turk(): return False try: hits = self.mtc.get_all_hits() except MTurkRequestError: return False reviewable_hits = [hit for hit in hits if hit.HITStatus == "Reviewable" \ or hit.HITStatus == "Reviewing"] hits_data = [MTurkHIT({ 'hitid': hit.HITId, 'title': hit.Title, 'status': hit.HITStatus, 'max_assignments': hit.MaxAssignments, 'number_assignments_completed': hit.NumberOfAssignmentsCompleted, 'number_assignments_pending': hit.NumberOfAssignmentsPending, 'number_assignments_available': hit.NumberOfAssignmentsAvailable, 'creation_time': hit.CreationTime, 'expiration': hit.Expiration }) for hit in reviewable_hits] return hits_data def get_all_hits(self): ''' Get all HITs ''' if not self.connect_to_turk(): return False try: hits = self.mtc.get_all_hits() except MTurkRequestError: return False hits_data = [MTurkHIT({ 'hitid': hit.HITId, 'title': hit.Title, 'status': hit.HITStatus, 'max_assignments': hit.MaxAssignments, 'number_assignments_completed': hit.NumberOfAssignmentsCompleted, 'number_assignments_pending': hit.NumberOfAssignmentsPending, 'number_assignments_available': hit.NumberOfAssignmentsAvailable, 'creation_time': hit.CreationTime, 'expiration': hit.Expiration, }) for hit in hits] return hits_data def get_active_hits(self): ''' Get active HITs ''' if not self.connect_to_turk(): return False # hits = self.mtc.search_hits() try: hits = self.mtc.get_all_hits() except MTurkRequestError: return False active_hits = [hit for hit in hits if not hit.expired] hits_data = [MTurkHIT({ 'hitid': hit.HITId, 'title': hit.Title, 'status': hit.HITStatus, 'max_assignments': hit.MaxAssignments, 'number_assignments_completed': hit.NumberOfAssignmentsCompleted, 'number_assignments_pending': hit.NumberOfAssignmentsPending, 'number_assignments_available': hit.NumberOfAssignmentsAvailable, 'creation_time': hit.CreationTime, 'expiration': hit.Expiration, }) for hit in active_hits] return hits_data def get_workers(self, assignment_status=None, chosen_hit=None): ''' Get workers ''' if not self.connect_to_turk(): return False try: if chosen_hit: hit_ids = [chosen_hit] else: hits = self.mtc.get_all_hits() hit_ids = [hit.HITId for hit in hits] workers_nested = [] page_size=100 for hit_id in hit_ids: current_page_number=1 hit_assignments = self.mtc.get_assignments( hit_id, status=assignment_status, sort_by='SubmitTime', page_size=page_size, page_number=current_page_number ) totalNumResults = int(hit_assignments.TotalNumResults) total_pages = (totalNumResults // page_size) + (totalNumResults % page_size > 0) #do integer division then round up if necessary while current_page_number < total_pages: current_page_number += 1 hit_assignments += self.mtc.get_assignments( hit_id, status=assignment_status, sort_by='SubmitTime', page_size=page_size, page_number=current_page_number ) workers_nested.append(hit_assignments) workers = [val for subl in workers_nested for val in subl] # Flatten nested lists except MTurkRequestError: return False worker_data = [{ 'hitId': worker.HITId, 'assignmentId': worker.AssignmentId, 'workerId': worker.WorkerId, 'submit_time': worker.SubmitTime, 'accept_time': worker.AcceptTime, 'status': worker.AssignmentStatus } for worker in workers] return worker_data def get_worker(self, assignment_id): if not self.connect_to_turk(): return False try: worker = self.mtc.get_assignment(assignment_id)[0] except MTurkRequestError as e: return False worker_data = [{ 'hitId': worker.HITId, 'assignmentId': worker.AssignmentId, 'workerId': worker.WorkerId, 'submit_time': worker.SubmitTime, 'accept_time': worker.AcceptTime, 'status': worker.AssignmentStatus }] return worker_data def bonus_worker(self, assignment_id, amount, reason=""): ''' Bonus worker ''' if not self.connect_to_turk(): return False try: bonus = MTurkConnection.get_price_as_price(amount) assignment = self.mtc.get_assignment(assignment_id)[0] worker_id = assignment.WorkerId self.mtc.grant_bonus(worker_id, assignment_id, bonus, reason) return True except MTurkRequestError as exception: print exception return False def approve_worker(self, assignment_id): ''' Approve worker ''' if not self.connect_to_turk(): return False try: self.mtc.approve_assignment(assignment_id, feedback=None) return True except MTurkRequestError as e: return False def reject_worker(self, assignment_id): ''' Reject worker ''' if not self.connect_to_turk(): return False try: self.mtc.reject_assignment(assignment_id, feedback=None) return True except MTurkRequestError: return False def unreject_worker(self, assignment_id): ''' Unreject worker ''' if not self.connect_to_turk(): return False try: self.mtc.approve_rejected_assignment(assignment_id) return True except MTurkRequestError: return False def verify_aws_login(self): ''' Verify AWS login ''' if ((self.aws_access_key_id == 'YourAccessKeyId') or (self.aws_secret_access_key == 'YourSecretAccessKey')): return False else: host = 'mechanicalturk.amazonaws.com' mturkparams = dict( aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, host=host) self.mtc = MTurkConnection(**mturkparams) try: self.mtc.get_account_balance() except MTurkRequestError as exception: print exception.error_message return False else: return True def connect_to_turk(self): ''' Connect to turk ''' if not self.valid_login: print 'Sorry, unable to connect to Amazon Mechanical Turk. AWS '\ 'credentials invalid.' return False if self.is_sandbox: host = 'mechanicalturk.sandbox.amazonaws.com' else: host = 'mechanicalturk.amazonaws.com' mturkparams = dict( aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, host=host) self.mtc = MTurkConnection(**mturkparams) return True def configure_hit(self, hit_config): ''' Configure HIT ''' # configure question_url based on the id experiment_portal_url = hit_config['ad_location'] frame_height = 600 mturk_question = ExternalQuestion(experiment_portal_url, frame_height) # Qualification: quals = Qualifications() approve_requirement = hit_config['approve_requirement'] quals.add( PercentAssignmentsApprovedRequirement("GreaterThanOrEqualTo", approve_requirement)) number_hits_approved = hit_config['number_hits_approved'] quals.add( NumberHitsApprovedRequirement("GreaterThanOrEqualTo", number_hits_approved)) require_master_workers = hit_config['require_master_workers'] if require_master_workers: quals.add(MasterRequirement(sandbox=self.is_sandbox)) if hit_config['us_only']: quals.add(LocaleRequirement("EqualTo", "US")) # Create a HIT type for this HIT. hit_type = self.mtc.register_hit_type( hit_config['title'], hit_config['description'], hit_config['reward'], hit_config['duration'], keywords=hit_config['keywords'], approval_delay=None, qual_req=quals)[0] # Check the config file to see if notifications are wanted. config = PsiturkConfig() config.load_config() try: url = config.get('Server Parameters', 'notification_url') all_event_types = [ "AssignmentAccepted", "AssignmentAbandoned", "AssignmentReturned", "AssignmentSubmitted", "HITReviewable", "HITExpired", ] self.mtc.set_rest_notification( hit_type.HITTypeId, url, event_types=all_event_types) except: pass # Specify all the HIT parameters self.param_dict = dict( hit_type=hit_type.HITTypeId, question=mturk_question, lifetime=hit_config['lifetime'], max_assignments=hit_config['max_assignments'], questions=None, response_groups=[ 'Minimal', 'HITDetail', 'HITQuestion', 'HITAssignmentSummary' ]) def check_balance(self): ''' Check balance ''' if not self.connect_to_turk(): return '-' return self.mtc.get_account_balance()[0] # TODO (if valid AWS credentials haven't been provided then # connect_to_turk() will fail, not error checking here and elsewhere) def create_hit(self, hit_config): ''' Create HIT ''' try: if not self.connect_to_turk(): return False self.configure_hit(hit_config) myhit = self.mtc.create_hit(**self.param_dict)[0] self.hitid = myhit.HITId except MTurkRequestError as e: print e return False else: return self.hitid # TODO(Jay): Have a wrapper around functions that serializes them. # Default output should not be serialized. def expire_hit(self, hitid): ''' Expire HIT ''' if not self.connect_to_turk(): return False try: self.mtc.expire_hit(hitid) return True except MTurkRequestError: print "Failed to expire HIT. Please check the ID and try again." return False def dispose_hit(self, hitid): ''' Dispose HIT ''' if not self.connect_to_turk(): return False try: self.mtc.dispose_hit(hitid) except Exception, e: print "Failed to dispose of HIT %s. Make sure there are no "\ "assignments remaining to be reviewed." % hitid
class MTurkServices: def __init__(self, aws_access_key_id, aws_secret_access_key, is_sandbox): self.update_credentials(aws_access_key_id, aws_secret_access_key) self.set_sandbox(is_sandbox) self.validLogin = self.verify_aws_login() if not self.validLogin: print 'WARNING *****************************' print 'Sorry, AWS Credentials invalid.\nYou will only be able to '\ + 'test experiments locally until you enter\nvalid '\ + 'credentials in the AWS Access section of ~/.psiturkconfig\n' def update_credentials(self, aws_access_key_id, aws_secret_access_key): self.aws_access_key_id = aws_access_key_id self.aws_secret_access_key = aws_secret_access_key def set_sandbox(self, is_sandbox): self.is_sandbox = is_sandbox def get_reviewable_hits(self): if not self.connect_to_turk(): return False try: hits = self.mtc.get_all_hits() except MTurkRequestError: return False reviewable_hits = [ hit for hit in hits if (hit.HITStatus == "Reviewable" or hit.HITStatus == "Reviewing") ] hits_data = [ MTurkHIT({ 'hitid': hit.HITId, 'title': hit.Title, 'status': hit.HITStatus, 'max_assignments': hit.MaxAssignments, 'number_assignments_completed': hit.NumberOfAssignmentsCompleted, 'number_assignments_pending': hit.NumberOfAssignmentsPending, 'number_assignments_available': hit.NumberOfAssignmentsAvailable, 'creation_time': hit.CreationTime, 'expiration': hit.Expiration, }) for hit in reviewable_hits ] return (hits_data) def get_all_hits(self): if not self.connect_to_turk(): return False try: hits = self.mtc.get_all_hits() except MTurkRequestError: return False hits_data = [ MTurkHIT({ 'hitid': hit.HITId, 'title': hit.Title, 'status': hit.HITStatus, 'max_assignments': hit.MaxAssignments, 'number_assignments_completed': hit.NumberOfAssignmentsCompleted, 'number_assignments_pending': hit.NumberOfAssignmentsPending, 'number_assignments_available': hit.NumberOfAssignmentsAvailable, 'creation_time': hit.CreationTime, 'expiration': hit.Expiration, }) for hit in hits ] return (hits_data) def get_active_hits(self): if not self.connect_to_turk(): return False # hits = self.mtc.search_hits() try: hits = self.mtc.get_all_hits() except MTurkRequestError: return False active_hits = [hit for hit in hits if not (hit.expired)] hits_data = [ MTurkHIT({ 'hitid': hit.HITId, 'title': hit.Title, 'status': hit.HITStatus, 'max_assignments': hit.MaxAssignments, 'number_assignments_completed': hit.NumberOfAssignmentsCompleted, 'number_assignments_pending': hit.NumberOfAssignmentsPending, 'number_assignments_available': hit.NumberOfAssignmentsAvailable, 'creation_time': hit.CreationTime, 'expiration': hit.Expiration, }) for hit in active_hits ] return (hits_data) def get_workers(self, assignmentStatus=None): if not self.connect_to_turk(): return False try: hits = self.mtc.search_hits(sort_direction='Descending', page_size=20) hit_ids = [hit.HITId for hit in hits] workers_nested = [ self.mtc.get_assignments(hit_id, status=assignmentStatus, sort_by='SubmitTime', page_size=100) for hit_id in hit_ids ] workers = [val for subl in workers_nested for val in subl] # Flatten nested lists except MTurkRequestError: return (False) worker_data = [{ 'hitId': worker.HITId, 'assignmentId': worker.AssignmentId, 'workerId': worker.WorkerId, 'submit_time': worker.SubmitTime, 'accept_time': worker.AcceptTime, 'status': worker.AssignmentStatus } for worker in workers] return (worker_data) def bonus_worker(self, assignment_id, amount, reason=""): if not self.connect_to_turk(): return False try: bonus = MTurkConnection.get_price_as_price(amount) assignment = self.mtc.get_assignment(assignment_id)[0] workerId = assignment.WorkerId self.mtc.grant_bonus(workerId, assignment_id, bonus, reason) return True except MTurkRequestError as e: print e return False def approve_worker(self, assignment_id): if not self.connect_to_turk(): return (False) try: self.mtc.approve_assignment(assignment_id, feedback=None) return True except MTurkRequestError: return (False) def reject_worker(self, assignment_id): if not self.connect_to_turk(): return False try: self.mtc.reject_assignment(assignment_id, feedback=None) return True except MTurkRequestError: return (False) def unreject_worker(self, assignment_id): if not self.connect_to_turk(): return False try: self.mtc.approve_rejected_assignment(assignment_id) return True except MTurkRequestError: return False def verify_aws_login(self): if (self.aws_access_key_id == 'YourAccessKeyId') or (self.aws_secret_access_key == 'YourSecretAccessKey'): return False else: host = 'mechanicalturk.amazonaws.com' mturkparams = dict( aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, host=host) self.mtc = MTurkConnection(**mturkparams) try: self.mtc.get_account_balance() except MTurkRequestError as e: print(e.error_message) return False else: return True def connect_to_turk(self): if not self.validLogin: print 'Sorry, unable to connect to Amazon Mechanical Turk. AWS credentials invalid.' return False if self.is_sandbox: host = 'mechanicalturk.sandbox.amazonaws.com' else: host = 'mechanicalturk.amazonaws.com' mturkparams = dict(aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, host=host) self.mtc = MTurkConnection(**mturkparams) return True def configure_hit(self, hit_config): # configure question_url based on the id experimentPortalURL = hit_config['ad_location'] frameheight = 600 mturkQuestion = ExternalQuestion(experimentPortalURL, frameheight) # Qualification: quals = Qualifications() approve_requirement = hit_config['approve_requirement'] quals.add( PercentAssignmentsApprovedRequirement("GreaterThanOrEqualTo", approve_requirement)) if hit_config['us_only']: quals.add(LocaleRequirement("EqualTo", "US")) # Specify all the HIT parameters self.paramdict = dict(hit_type=None, question=mturkQuestion, lifetime=hit_config['lifetime'], max_assignments=hit_config['max_assignments'], title=hit_config['title'], description=hit_config['description'], keywords=hit_config['keywords'], reward=hit_config['reward'], duration=hit_config['duration'], approval_delay=None, questions=None, qualifications=quals) def check_balance(self): if not self.connect_to_turk(): return ('-') return (self.mtc.get_account_balance()[0]) # TODO (if valid AWS credentials haven't been provided then connect_to_turk() will # fail, not error checking here and elsewhere) def create_hit(self, hit_config): try: if not self.connect_to_turk(): return False self.configure_hit(hit_config) myhit = self.mtc.create_hit(**self.paramdict)[0] self.hitid = myhit.HITId except: return False else: return self.hitid # TODO(Jay): Have a wrapper around functions that serializes them. # Default output should not be serialized. def expire_hit(self, hitid): if not self.connect_to_turk(): return False try: self.mtc.expire_hit(hitid) return True except MTurkRequestError: print "Failed to expire HIT. Please check the ID and try again." return False def dispose_hit(self, hitid): if not self.connect_to_turk(): return False try: self.mtc.dispose_hit(hitid) except Exception, e: print 'Failed to dispose of HIT %s. Make sure there are no assignments remaining to be reviewed' % hitid
class HaCRSTurker: def __init__(self): self.config = HaCRSUtil.get_config('../config.ini') HOST = self.config.get('mturk', 'host') AWS_ACCESS_KEY_ID = self.config.get('mturk', 'access_key_id') AWS_SECRET_ACCESS_KEY = self.config.get('mturk', 'secret_access_key') self.MTconnection = MTurkConnection( aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, host=HOST) self.db = HaCRSDB() def get_balance(self): #print self.MTconnection.get_account_balance() pass def expire_all_hits(self): all_hits = self.MTconnection.get_all_hits() for hit in all_hits: if hit.expired: continue try: self.MTconnection.expire_hit(hit.HITId) #print 'Expired HIT' except Exception as e: #print 'Could not expire: {}'.format(e) pass def delete_all_mturk_hits(self): all_hits = self.MTconnection.get_all_hits() for hit in all_hits: #print 'expire/dispose' self.MTconnection.expire_hit(hit.HITId) self.MTconnection.dispose_hit(hit.HITId) def get_all_mturk_hits(self): all_hits = self.MTconnection.get_all_hits() return all_hits # TODO: HITs available via API, but not via Amazon Web Sandbox def push_tasklet_mturk(self, keywords): sdescription = self.config.get('mturk', 'shortdescr') frame_height = self.config.get('mturk', 'frameheight') #url = "https://cgcturk.hacked.jp/tasklet/{}/".format(tasklet['id']) url = "https://cgcturk.hacked.jp/pick_tasklet/{}/".format(keywords) #keywords = tasklet['keywords'] #amount = tasklet['amount'] if keywords == 'easy': amount = 1.00 elif keywords in ['medium', 'hard', 'very_hard']: amount = 2.00 elif keywords == 'priority': amount = 4.00 else: #print 'Error' sys.exit(1) questionform = ExternalQuestion(url, frame_height) title = 'HELP AN AI!!! We are students building an artificial intelligence to find bugs in programs to keep the internet safe' sdescription = 'We are students building an artificial intelligence system that finds bugs in programs and keeps the internet safe from malware. BUT IT NEEDS YOUR HELP! Play with programs to find functions that it missed, and get $$$!' hit_result = self.MTconnection.create_hit( title='[{}] {}'.format(keywords, title), description=sdescription, keywords=keywords, max_assignments=1, question=questionform, reward=Price(amount=amount), response_groups=('Minimal', 'HITDetail'), # ? ) assert len(hit_result) == 1 mturkid = self.db.create_mturk_resource(hit_result[0].HITId, hit_result[0].HITGroupId) #self.db.add_mturk_tasklet_association(tasklet['id'], mturkid) #self.db.commit() return mturkid, hit_result def push_tasks_mturk(self): frame_height = self.config.get('mturk', 'frameheight') amount = 0.01 tasklets = self.db.get_unassigned_tasklets() sdescription = self.config.get('mturk', 'shortdescr') for tasklet in tasklets: #print 'pushing!' url = "https://cgcturk.hacked.jp/tasklet/{}/".format(tasklet['id']) keywords = ["easy"] questionform = ExternalQuestion(url, frame_height) hit_result = self.MTconnection.create_hit( title=HaCRSUtil.get_tasklet_name(tasklet), description=sdescription, keywords=keywords, max_assignments=1, question=questionform, reward=Price(amount=amount), response_groups=('Minimal', 'HITDetail'), # ? ) assert len(hit_result) == 1 mturkid = self.db.create_mturk_resource(hit_result[0].HITId, hit_result[0].HITGroupId) self.db.add_mturk_tasklet_association(tasklet['id'], mturkid) self.db.commit() def show_seed_tasklets(self): pprint(self.db.get_seed_tasklets()) def get_hit(self, hitid): try: hit = self.MTconnection.get_hit(hitid) except Exception as e: return None if hit != None: return hit[0] def get_assignment_from_hit(self, hitid): try: assignments = self.MTconnection.get_assignments(hitid) return assignments[0] except Exception as e: return None def get_approved_seeding_tasklets(self): for program in json.load( open(self.config.get('general', 'programsjson'))): pid = self.db.lookup_program(program) program = None approved = set() for tasklet in self.db.get_latest_seed_tasklets(): turkinfos = self.db.get_mturk_infos(tasklet['id']) try: #hit = self.MTconnection.get_hit(turkinfos['hitid']) assignments = self.MTconnection.get_assignments( turkinfos['hitid']) if len(assignments) == 0: continue if assignments[0].AssignmentStatus == 'Approved': approved.add(self.db.get_tasklet_program(tasklet['id'])) except Exception as e: #print e pass return list(approved)
class MTurkProvider(object): description = 'This is a task authored by a requester on Daemo, a research crowdsourcing platform. ' \ 'Mechanical Turk workers are welcome to do it' keywords = ['daemo'] countries = ['US', 'CA'] min_hits = 1000 def __init__(self, host, aws_access_key_id, aws_secret_access_key): self.host = host self.connection = MTurkConnection( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, host=settings.MTURK_HOST ) self.connection.APIVersion = "2014-08-15" if not self.host: raise ValueError("Please provide a host url") def get_connection(self): return self.connection @staticmethod def _mturk_system_qualifications(qualification): requirements = [] for item in qualification.items.all(): if item.expression['attribute'] not in ['location', 'approval_rate', 'total_tasks']: continue requirement = None if item.expression['attribute'] == 'location': op = OP_IN if item.expression['operator'] == 'in' else OP_NOT_IN requirement = MultiLocaleRequirement(op, [val.strip() for val in item.expression['value'] if val is not None and val != '']) elif item.expression['attribute'] == 'approval_rate': op = OP_GT if item.expression['operator'] == 'gt' else OP_LT requirement = PercentAssignmentsApprovedRequirement(op, item.expression['value']) elif item.expression['attribute'] == 'total_tasks': op = OP_GT if item.expression['operator'] == 'gt' else OP_LT requirement = NumberHitsApprovedRequirement(op, item.expression['value']) requirements.append(requirement) return requirements def get_qualifications(self, project, boomerang_threshold, add_boomerang): requirements = [] if project.qualification is not None: requirements += self._mturk_system_qualifications(project.qualification) boomerang_qual, success = self.create_qualification_type(owner_id=project.owner_id, project_id=project.group_id, name='Boomerang Score #{}'.format(project.group_id), flag=FLAG_Q_BOOMERANG, description='No description available') boomerang = None if boomerang_threshold <= int(settings.BOOMERANG_MIDPOINT * 100): for i, bucket in enumerate(WAIT_LIST_BUCKETS): if int(bucket[1] * 100) <= boomerang_threshold: boomerang_blacklist, success = \ self.create_qualification_type(owner_id=project.owner_id, name='Boomerang Waitlist #{}-{}'.format(project.group_id, len( WAIT_LIST_BUCKETS) - i), flag=FLAG_Q_BOOMERANG, description='No description available', deny=True, project_id=project.group_id, bucket=bucket) if success and add_boomerang: boomerang = BoomerangRequirement(qualification_type_id=boomerang_blacklist.type_id, comparator=OP_DNE, integer_value=None) requirements.append(boomerang) else: boomerang = BoomerangRequirement(qualification_type_id=boomerang_qual.type_id, comparator=OP_GTEQ, integer_value=boomerang_threshold) if success and add_boomerang: requirements.append(boomerang) return Qualifications(requirements), boomerang_qual def create_hits(self, project, tasks=None, repetition=None): # if project.min_rating > 0: # return 'NOOP' if not tasks: cursor = connection.cursor() # noinspection SqlResolve query = ''' SELECT max(id) id, repetition, group_id, repetition - sum(existing_assignments) remaining_assignments, min_rating FROM ( SELECT t_rev.id, t.group_id, t.min_rating, p.repetition, CASE WHEN ma.id IS NULL OR ma.status IN (%(skipped)s, %(rejected)s, %(expired)s) THEN 0 ELSE 1 END existing_assignments FROM crowdsourcing_task t INNER JOIN crowdsourcing_project p ON t.project_id = p.id INNER JOIN crowdsourcing_task t_rev ON t_rev.group_id = t.group_id LEFT OUTER JOIN mturk_mturkhit mh ON mh.task_id = t_rev.id LEFT OUTER JOIN mturk_mturkassignment ma ON ma.hit_id = mh.id WHERE t.project_id = (%(project_id)s) AND t_rev.exclude_at IS NULL AND t_rev.deleted_at IS NULL ) t GROUP BY group_id, repetition, min_rating HAVING sum(existing_assignments) < repetition; ''' cursor.execute(query, {'skipped': TaskWorker.STATUS_SKIPPED, 'rejected': TaskWorker.STATUS_REJECTED, 'expired': TaskWorker.STATUS_EXPIRED, 'project_id': project.id}) tasks = cursor.fetchall() rated_workers = Rating.objects.filter(origin_type=Rating.RATING_REQUESTER).count() add_boomerang = rated_workers > 0 duration = project.timeout if project.timeout is not None else datetime.timedelta(hours=24) lifetime = project.deadline - timezone.now() if project.deadline is not None else datetime.timedelta( days=7) for task in tasks: question = self.create_external_question(task[0]) mturk_hit = MTurkHIT.objects.filter(task_id=task[0]).first() qualifications, boomerang_qual = self.get_qualifications(project=project, boomerang_threshold=int( round(task[4], 2) * 100), add_boomerang=add_boomerang) qualifications_mask = 0 if qualifications is not None: qualifications_mask = FLAG_Q_LOCALE + FLAG_Q_HITS + FLAG_Q_RATE + FLAG_Q_BOOMERANG hit_type, success = self.create_hit_type(title=project.name, description=self.description, price=project.price, duration=duration, keywords=self.keywords, approval_delay=datetime.timedelta(days=2), qual_req=qualifications, qualifications_mask=qualifications_mask, boomerang_threshold=int(round(task[4], 2) * 100), owner_id=project.owner_id, boomerang_qual=boomerang_qual) if not success: return 'FAILURE' if mturk_hit is None: try: hit = self.connection.create_hit(hit_type=hit_type.string_id, max_assignments=task[3], lifetime=lifetime, question=question)[0] self.set_notification(hit_type_id=hit.HITTypeId) mturk_hit = MTurkHIT(hit_id=hit.HITId, hit_type=hit_type, task_id=task[0]) except MTurkRequestError as e: error = e.errors[0][0] if error == 'AWS.MechanicalTurk.InsufficientFunds': message = { "type": "ERROR", "detail": "Insufficient funds on your Mechanical Turk account!", "code": error } redis_publisher = RedisPublisher(facility='bot', users=[project.owner]) message = RedisMessage(json.dumps(message)) redis_publisher.publish_message(message) return 'FAILED' else: if mturk_hit.hit_type_id != hit_type.id: result, success = self.change_hit_type_of_hit(hit_id=mturk_hit.hit_id, hit_type_id=hit_type.string_id) if success: mturk_hit.hit_type = hit_type mturk_hit.save() return 'SUCCESS' def create_hit_type(self, owner_id, title, description, price, duration, boomerang_threshold, keywords=None, approval_delay=None, qual_req=None, qualifications_mask=0, boomerang_qual=None): hit_type = MTurkHITType.objects.filter(owner_id=owner_id, name=title, description=description, price=Decimal(str(price)), duration=duration, qualifications_mask=qualifications_mask, boomerang_threshold=boomerang_threshold).first() if hit_type is not None: return hit_type, True reward = Price(price) try: mturk_ht = self.connection.register_hit_type(title=title, description=description, reward=reward, duration=duration, keywords=keywords, approval_delay=approval_delay, qual_req=qual_req)[0] hit_type = MTurkHITType(owner_id=owner_id, name=title, description=description, price=Decimal(str(price)), keywords=keywords, duration=duration, qualifications_mask=qualifications_mask, boomerang_qualification=boomerang_qual, boomerang_threshold=boomerang_threshold) hit_type.string_id = mturk_ht.HITTypeId hit_type.save() except MTurkRequestError: return None, False return hit_type, True def create_external_question(self, task, frame_height=800): task_hash = Hashids(salt=settings.SECRET_KEY, min_length=settings.ID_HASH_MIN_LENGTH) task_id = task_hash.encode(task) url = self.host + '/mturk/task/?taskId=' + task_id question = ExternalQuestion(external_url=url, frame_height=frame_height) return question def update_max_assignments(self, task): task = Task.objects.get(id=task['id']) mturk_hit = task.mturk_hit if not mturk_hit: raise MTurkHIT.DoesNotExist("This task is not associated to any mturk hit") assignments_completed = task.task_workers.filter(~Q(status__in=[TaskWorker.STATUS_REJECTED, TaskWorker.STATUS_SKIPPED, TaskWorker.STATUS_EXPIRED])).count() remaining_assignments = task.project.repetition - assignments_completed if remaining_assignments > 0 and mturk_hit.num_assignments == mturk_hit.mturk_assignments. \ filter(status=TaskWorker.STATUS_SUBMITTED).count() and \ mturk_hit.mturk_assignments.filter(status=TaskWorker.STATUS_IN_PROGRESS).count() == 0: self.add_assignments(hit_id=mturk_hit.hit_id, increment=1) self.extend_hit(hit_id=mturk_hit.hit_id) mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS mturk_hit.num_assignments += 1 mturk_hit.save() elif remaining_assignments == 0: self.expire_hit(hit_id=mturk_hit.hit_id) mturk_hit.status = MTurkHIT.STATUS_EXPIRED mturk_hit.save() elif remaining_assignments > 0 and \ mturk_hit.status == MTurkHIT.STATUS_EXPIRED: self.extend_hit(hit_id=mturk_hit.hit_id) mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS return 'SUCCESS' def get_assignment(self, assignment_id): try: return self.connection.get_assignment(assignment_id)[0], True except MTurkRequestError as e: error = e.errors[0][0] if error == 'AWS.MechanicalTurk.InvalidAssignmentState': return assignment_id, False return None, False def set_notification(self, hit_type_id): self.connection.set_rest_notification(hit_type=hit_type_id, url=self.host + '/api/mturk/notification', event_types=['AssignmentReturned', 'AssignmentAbandoned', 'AssignmentAccepted', 'AssignmentSubmitted']) def approve_assignment(self, task_worker): task_worker_obj = TaskWorker.objects.get(id=task_worker['id']) if hasattr(task_worker_obj, 'mturk_assignments') and task_worker_obj.mturk_assignments.first() is not None: try: self.connection.approve_assignment(task_worker_obj.mturk_assignments.first().assignment_id) except MTurkRequestError: return False return True def reject_assignment(self, task_worker): task_worker_obj = TaskWorker.objects.get(id=task_worker['id']) if hasattr(task_worker_obj, 'mturk_assignments') and task_worker_obj.mturk_assignments.first() is not None: try: self.connection.reject_assignment(task_worker_obj.mturk_assignments.first().assignment_id) except MTurkRequestError: return False return True def expire_hit(self, hit_id): try: self.connection.expire_hit(hit_id) except MTurkRequestError: return False return True def disable_hit(self, hit_id): try: self.connection.disable_hit(hit_id) except MTurkRequestError: return False return True def extend_hit(self, hit_id): try: self.connection.extend_hit(hit_id=hit_id, expiration_increment=604800) # 7 days except MTurkRequestError: return False return True def add_assignments(self, hit_id, increment=1): try: self.connection.extend_hit(hit_id=hit_id, assignments_increment=increment) except MTurkRequestError: return False return True def test_connection(self): try: return self.connection.get_account_balance()[0], True except MTurkRequestError as e: error = e.errors[0][0] if error == 'AWS.NotAuthorized': return None, False return None, False def get_account_balance(self): try: return self.connection.get_account_balance()[0] except MTurkRequestError: return None def create_qualification_type(self, owner_id, name, flag, description, project_id, auto_granted=False, auto_granted_value=None, deny=False, bucket=None): # noinspection SqlResolve query = ''' SELECT * FROM ( SELECT task.target_id, task.username, round(task.task_w_avg::NUMERIC, 2) rating --round(coalesce(task.task_w_avg, requester.requester_w_avg, -- platform.platform_w_avg)::NUMERIC, 2) rating FROM ( SELECT target_id, origin_id, project_id, username, sum(weight * power((%(BOOMERANG_TASK_ALPHA)s), t.row_number)) / sum(power((%(BOOMERANG_TASK_ALPHA)s), t.row_number)) task_w_avg FROM ( SELECT r.id, r.origin_id, p.group_id project_id, weight, r.target_id, -1 + row_number() OVER (PARTITION BY target_id ORDER BY tw.created_at DESC) AS row_number, u.username username FROM crowdsourcing_rating r INNER JOIN crowdsourcing_task t ON t.id = r.task_id INNER JOIN crowdsourcing_project p ON p.id = t.project_id INNER JOIN crowdsourcing_taskworker tw ON t.id = tw.task_id AND tw.worker_id=r.target_id INNER JOIN auth_user u ON u.id = r.target_id WHERE origin_id = (%(origin_id)s) AND origin_type = (%(origin_type)s)) t GROUP BY origin_id, target_id, project_id, username) task WHERE task.project_id = (%(project_id)s) ) r ''' extra_query = 'WHERE rating BETWEEN (%(lower_bound)s) AND (%(upper_bound)s);' params = { 'origin_type': Rating.RATING_REQUESTER, 'origin_id': owner_id, 'project_id': project_id, 'BOOMERANG_REQUESTER_ALPHA': settings.BOOMERANG_REQUESTER_ALPHA, 'BOOMERANG_PLATFORM_ALPHA': settings.BOOMERANG_PLATFORM_ALPHA, 'BOOMERANG_TASK_ALPHA': settings.BOOMERANG_TASK_ALPHA } obj_params = {'upper_bound': 300, 'lower_bound': 100} if deny and bucket is not None: query += extra_query params.update({'upper_bound': bucket[1], 'lower_bound': bucket[0]}) obj_params.update({'upper_bound': bucket[1] * 100, 'lower_bound': bucket[0] * 100, 'is_blacklist': True}) cursor = connection.cursor() cursor.execute(query, params=params) worker_ratings_raw = cursor.fetchall() worker_ratings = [{"worker_id": r[0], "worker_username": r[1], "rating": r[2]} for r in worker_ratings_raw] qualification = MTurkQualification.objects.filter(owner_id=owner_id, flag=flag, name=name).first() assigned_workers = [] if qualification is None: try: qualification_type = self.connection. \ create_qualification_type(name=name, description=description, status='Active', auto_granted=auto_granted, auto_granted_value=auto_granted_value)[0] qualification = MTurkQualification.objects.create(owner_id=owner_id, flag=flag, name=name, description=description, auto_granted=auto_granted, auto_granted_value=auto_granted_value, type_id=qualification_type.QualificationTypeId, **obj_params) except MTurkRequestError: return None, False else: assigned_workers = MTurkWorkerQualification.objects.values('worker').filter( qualification=qualification).values_list('worker', flat=True) for rating in worker_ratings: user_name = rating["worker_username"].split('.') if len(user_name) == 2 and user_name[0] == 'mturk': mturk_worker_id = user_name[1].upper() if mturk_worker_id not in assigned_workers: self.assign_qualification( qualification_type_id=qualification.type_id, worker_id=mturk_worker_id, value=int(rating['rating'] * 100)) defaults = { 'qualification': qualification, 'worker': mturk_worker_id, 'score': int(rating['rating'] * 100) } MTurkWorkerQualification.objects.update_or_create(qualification=qualification, worker=mturk_worker_id, defaults=defaults) return qualification, True def change_hit_type_of_hit(self, hit_id, hit_type_id): try: result = self.connection.change_hit_type_of_hit(hit_id=hit_id, hit_type=hit_type_id) except MTurkRequestError: return None, False return result, True def update_worker_boomerang(self, project_id, worker_id, task_avg, requester_avg): """ Update boomerang for project Args: project_id: worker_id: task_avg: requester_avg Returns: str """ hit = MTurkHIT.objects.select_related('hit_type__boomerang_qualification').filter( task__project__group_id=project_id).first() if hit is not None: qualification = hit.hit_type.boomerang_qualification worker_qual = MTurkWorkerQualification.objects.filter(qualification=qualification, worker=worker_id).first() if worker_qual is not None: self.update_score(worker_qual, score=int(task_avg * 100), override=True) else: MTurkWorkerQualification.objects.create(qualification=qualification, worker=worker_id, score=int(task_avg * 100), overwritten=True) self.assign_qualification(qualification_type_id=qualification.type_id, worker_id=worker_id, value=int(task_avg * 100)) # other_quals = MTurkWorkerQualification.objects.filter(~Q(qualification=qualification), # worker=worker_id, # overwritten=False) # for q in other_quals: # self.update_score(q, score=int(requester_avg * 100)) return 'SUCCESS' def update_score(self, worker_qual, score, override=False): if worker_qual is None: return False try: self.connection.update_qualification_score(worker_qual.qualification.type_id, worker_qual.worker, score) worker_qual.overwritten = override worker_qual.score = score worker_qual.save() except MTurkRequestError: return False return True def assign_qualification(self, qualification_type_id, worker_id, value=1): """ Revoke a qualification from a WorkerId Args: qualification_type_id: worker_id: value Returns: bool """ try: self.connection.assign_qualification(qualification_type_id, worker_id, value, send_notification=False) return True except MTurkRequestError: return False def revoke_qualification(self, qualification_type_id, worker_id): try: self.connection.revoke_qualification(qualification_type_id=qualification_type_id, subject_id=worker_id) return True except MTurkRequestError: return False def notify_workers(self, worker_ids, subject, message_text): try: self.connection.notify_workers(worker_ids, subject, message_text) return True except MTurkRequestError: return False
def check_db_for_missing_notifications(): """Check the database for missing notifications.""" aws_access_key_id = os.environ['aws_access_key_id'] aws_secret_access_key = os.environ['aws_secret_access_key'] if config.getboolean('Shell Parameters', 'launch_in_sandbox_mode'): conn = MTurkConnection( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, host='mechanicalturk.sandbox.amazonaws.com') else: conn = MTurkConnection( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) # get all participants with status < 100 participants = Participant.query.filter_by(status="working").all() # get current time current_time = datetime.now() # get experiment duration in seconds duration = float(config.get('HIT Configuration', 'duration')) * 60 * 60 # for each participant, if current_time - start_time > duration + 5 mins for p in participants: p_time = (current_time - p.creation_time).total_seconds() if p_time > (duration + 120): print ("Error: participant {} with status {} has been playing for too " "long and no notification has arrived - " "running emergency code".format(p.id, p.status)) # get their assignment assignment_id = p.assignment_id # ask amazon for the status of the assignment try: assignment = conn.get_assignment(assignment_id)[0] status = assignment.AssignmentStatus except: status = None print "assignment status from AWS is {}".format(status) hit_id = p.hit_id # general email settings: username = os.getenv('wallace_email_username') fromaddr = username + "@gmail.com" email_password = os.getenv("wallace_email_key") toaddr = config.get('HIT Configuration', 'contact_email_on_error') whimsical = os.getenv("whimsical") if status == "Approved": # if its been approved, set the status accordingly print "status set to approved" p.status = "approved" session.commit() elif status == "Rejected": print "status set to rejected" # if its been rejected, set the status accordingly p.status = "rejected" session.commit() elif status == "Submitted": # if it has been submitted then resend a submitted notification args = { 'Event.1.EventType': 'AssignmentSubmitted', 'Event.1.AssignmentId': assignment_id } requests.post( "http://" + os.environ['HOST'] + '/notifications', data=args) # send the researcher an email to let them know if whimsical: msg = MIMEText( """Dearest Friend,\n\nI am writing to let you know that at {}, during my regular (and thoroughly enjoyable) perousal of the most charming participant data table, I happened to notice that assignment {} has been taking longer than we were expecting. I recall you had suggested {} minutes as an upper limit for what was an acceptable length of time for each assignement , however this assignment had been underway for a shocking {} minutes, a full {} minutes over your allowance. I immediately dispatched a telegram to our mutual friends at AWS and they were able to assure me that although the notification had failed to be correctly processed, the assignment had in fact been completed. Rather than trouble you, I dealt with this myself and I can assure you there is no immediate cause for concern. Nonetheless, for my own peace of mind, I would appreciate you taking the time to look into this matter at your earliest convenience.\n\nI remain your faithful and obedient servant, \nAlfred R. Wallace\n\n P.S. Please do not respond to this message, I am busy with other matters.""".format( datetime.now(), assignment_id, round(duration/60), round(p_time/60), round((p_time-duration)/60))) msg['Subject'] = "A matter of minor concern." else: msg = MIMEText( """Dear experimenter,\n\nThis is an automated email from Wallace. You are receiving this email because the Wallace platform has discovered evidence that a notification from Amazon Web Services failed to arrive at the server. Wallace has automatically contacted AWS and has determined the dropped notification was a submitted notification (i.e. the participant has finished the experiment). This is a non-fatal error and so Wallace has auto-corrected the problem. Nonetheless you may wish to check the database.\n\nBest,\nThe Wallace dev. team.\n\n Error details:\nAssignment: {} \nAllowed time: {}\nTime since participant started: {}""").format( assignment_id, round(duration/60), round(p_time/60)) msg['Subject'] = "Wallace automated email - minor error." # This method commented out as gmail now blocks emails from # new locations # server = smtplib.SMTP('smtp.gmail.com:587') # server.starttls() # server.login(username, email_password) # server.sendmail(fromaddr, toaddr, msg.as_string()) # server.quit() print ("Error - submitted notification for participant {} missed. " "Database automatically corrected, but proceed with caution." .format(p.id)) else: # if it has not been submitted shut everything down # first turn off autorecruit host = os.environ['HOST'] host = host[:-len(".herokuapp.com")] args = json.dumps({"auto_recruit": "false"}) headers = { "Accept": "application/vnd.heroku+json; version=3", "Content-Type": "application/json" } heroku_email_address = os.getenv('heroku_email_address') heroku_password = os.getenv('heroku_password') requests.patch( "https://api.heroku.com/apps/{}/config-vars".format(host), data=args, auth=(heroku_email_address, heroku_password), headers=headers) # then force expire the hit via boto conn.expire_hit(hit_id) # send the researcher an email to let them know if whimsical: msg = MIMEText( """Dearest Friend,\n\nI am afraid I write to you with most grave tidings. At {}, during a routine check of the usually most delightful participant data table, I happened to notice that assignment {} has been taking longer than we were expecting. I recall you had suggested {} minutes as an upper limit for what was an acceptable length of time for each assignment, however this assignment had been underway for a shocking {} minutes, a full {} minutes over your allowance. I immediately dispatched a telegram to our mutual friends at AWS and they infact informed me that they had already sent us a notification which we must have failed to process, implying that the assignment had not been successfully completed. Of course when the seriousness of this scenario dawned on me I had to depend on my trusting walking stick for support: without the notification I didn't know to remove the old assignment's data from the tables and AWS will have already sent their replacement, meaning that the tables may already be in a most unsound state!\n\nI am sorry to trouble you with this, however, I do not know how to proceed so rather than trying to remedy the scenario myself, I have instead temporarily ceased operations by expiring the HIT with the fellows at AWS and have refrained from posting any further invitations myself. Once you see fit I would be most appreciative if you could attend to this issue with the caution, sensitivity and intelligence for which I know you so well.\n\nI remain your faithful and obedient servant,\nAlfred R. Wallace\n\nP.S. Please do not respond to this message, I am busy with other matters.""".format( datetime.now(), assignment_id, round(duration/60), round(p_time/60), round((p_time-duration)/60))) msg['Subject'] = "Most troubling news." else: msg = MIMEText( """Dear experimenter,\n\nThis is an automated email from Wallace. You are receiving this email because the Wallace platform has discovered evidence that a notification from Amazon Web Services failed to arrive at the server. Wallace has automatically contacted AWS and has determined the dropped notification was an abandoned/returned notification (i.e. the participant had returned the experiment or had run out of time). This is a serious error and so Wallace has paused the experiment - expiring the HIT on MTurk and setting auto_recruit to false. Participants currently playing will be able to finish, however no further participants will be recruited until you do so manually. We strongly suggest you use the details below to check the database to make sure the missing notification has not caused additional problems before resuming.\nIf you are receiving a lot of these emails this suggests something is wrong with your experiment code.\n\nBest, \nThe Wallace dev. team.\n\n Error details:\nAssignment: {} \nAllowed time: {}\nTime since participant started: {}""").format( assignment_id, round(duration/60), round(p_time/60)) msg['Subject'] = "Wallace automated email - major error." # This method commented out as gmail now blocks emails from # new locations # server = smtplib.SMTP('smtp.gmail.com:587') # server.starttls() # server.login(username, email_password) # server.sendmail(fromaddr, toaddr, msg.as_string()) # server.quit() # send a notificationmissing notification args = { 'Event.1.EventType': 'NotificationMissing', 'Event.1.AssignmentId': assignment_id } requests.post( "http://" + os.environ['HOST'] + '/notifications', data=args) print ("Error - abandoned/returned notification for participant {} missed. " "Experiment shut down. Please check database and then manually " "resume experiment." .format(p.id))
import datetime, sys, sqlite3 from boto.pyami.config import Config, BotoConfigLocations from boto.mturk.connection import MTurkConnection from boto.mturk.question import QuestionContent,Question,QuestionForm,Overview,AnswerSpecification,SelectionAnswer,FormattedContent,FreeTextAnswer from boto.mturk.qualification import Qualifications,PercentAssignmentsApprovedRequirement # BOTO Configuration config = Config() AWS_ID = config.get('Credentials', 'aws_access_key_id', None) SECRET_ID = config.get('Credentials', 'aws_secret_access_key_id', None) HOST = 'mechanicalturk.sandbox.amazonaws.com' mt = MTurkConnection( aws_access_key_id=AWS_ID, aws_secret_access_key=SECRET_ID, host=HOST ) database = sqlite3.connect('crowdstorming.db') db = database.cursor() db.execute("select * from hits") allHits = db.fetchall() for hit in allHits: hitID = str(hit[1]) mt.expire_hit(hitID) print 'expired HIT: ' + hitID
class MTurkServices: def __init__(self, config): self.config = config def get_active_hits(self): self.connect_to_turk() # hits = self.mtc.search_hits() try: hits = self.mtc.get_all_hits() except MTurkRequestError: return(False) active_hits = [hit for hit in hits if not(hit.expired)] hits_data = [{'hitid': hit.HITId, 'title': hit.Title, 'status': hit.HITStatus, 'max_assignments': hit.MaxAssignments, 'number_assignments_completed': hit.NumberOfAssignmentsCompleted, 'number_assignments_pending': hit.NumberOfAssignmentsCompleted, 'number_assignments_available': hit.NumberOfAssignmentsAvailable, 'creation_time': hit.CreationTime, 'expiration': hit.Expiration, } for hit in active_hits] return(hits_data) def get_workers(self): self.connect_to_turk() try: hits = self.mtc.search_hits(sort_direction='Descending', page_size=20) hit_ids = [hit.HITId for hit in hits] workers_nested = [self.mtc.get_assignments( hit_id, status="Submitted", sort_by='SubmitTime', page_size=100 ) for hit_id in hit_ids] workers = [val for subl in workers_nested for val in subl] # Flatten nested lists except MTurkRequestError: return(False) completed_workers = [worker for worker in workers if worker.AssignmentStatus == "Submitted"] worker_data = [{'hitId': worker.HITId, 'assignmentId': worker.AssignmentId, 'workerId': worker.WorkerId, 'submit_time': worker.SubmitTime, 'accept_time': worker.AcceptTime } for worker in completed_workers] return(worker_data) def approve_worker(self, assignment_id): self.connect_to_turk() try: self.mtc.approve_assignment(assignment_id, feedback=None) except MTurkRequestError: return(False) def reject_worker(self, assignment_id): self.connect_to_turk() try: self.mtc.reject_assignment(assignment_id, feedback=None) except MTurkRequestError: return(False) def verify_aws_login(self, key_id, secret_key): is_sandbox = self.config.getboolean('HIT Configuration', 'using_sandbox') if is_sandbox: host = 'mechanicalturk.sandbox.amazonaws.com' else: host = 'mechanicalturk.amazonaws.com' mturkparams = dict( aws_access_key_id=key_id, aws_secret_access_key=secret_key, host=host) self.mtc = MTurkConnection(**mturkparams) try: self.mtc.get_account_balance() except MTurkRequestError as e: print(e.error_message) print('AWS Credentials invalid') return 0 else: print('AWS Credentials valid') return 1 def connect_to_turk(self): is_sandbox = self.config.getboolean('HIT Configuration', 'using_sandbox') if is_sandbox: host = 'mechanicalturk.sandbox.amazonaws.com' else: host = 'mechanicalturk.amazonaws.com' mturkparams = dict( aws_access_key_id = self.config.get('AWS Access', 'aws_access_key_id'), aws_secret_access_key = self.config.get('AWS Access', 'aws_secret_access_key'), host=host) self.mtc = MTurkConnection(**mturkparams) def configure_hit(self): # Configure portal experimentPortalURL = self.config.get('HIT Configuration', 'question_url') frameheight = 600 mturkQuestion = ExternalQuestion(experimentPortalURL, frameheight) # Qualification: quals = Qualifications() approve_requirement = self.config.get('HIT Configuration', 'Approve_Requirement') quals.add( PercentAssignmentsApprovedRequirement("GreaterThanOrEqualTo", approve_requirement)) if self.config.getboolean('HIT Configuration', 'US_only'): quals.add(LocaleRequirement("EqualTo", "US")) # Specify all the HIT parameters self.paramdict = dict( hit_type = None, question = mturkQuestion, lifetime = datetime.timedelta(hours=self.config.getfloat('HIT Configuration', 'HIT_lifetime')), max_assignments = self.config.getint('HIT Configuration', 'max_assignments'), title = self.config.get('HIT Configuration', 'title'), description = self.config.get('HIT Configuration', 'description'), keywords = self.config.get('HIT Configuration', 'keywords'), reward = self.config.getfloat('HIT Configuration', 'reward'), duration = datetime.timedelta(hours=self.config.getfloat('HIT Configuration', 'duration')), approval_delay = None, questions = None, qualifications = quals ) def is_signed_up(self): access_key_id = self.config.get('AWS Access', 'aws_access_key_id') access_key = self.config.get('AWS Access', 'aws_secret_access_key') return (access_key_id != 'YourAccessKeyId') and \ (access_key != 'YourSecreteAccessKey') def check_balance(self): if self.is_signed_up(): self.connect_to_turk() return(self.mtc.get_account_balance()[0]) else: return('-') # TODO (if valid AWS credentials haven't been provided then connect_to_turk() will # fail, not error checking here and elsewhere) def create_hit(self): self.connect_to_turk() self.configure_hit() myhit = self.mtc.create_hit(**self.paramdict)[0] self.hitid = myhit.HITId # TODO(Jay): Have a wrapper around functions that serializes them. # Default output should not be serialized. def expire_hit(self, hitid): self.connect_to_turk() self.mtc.expire_hit(hitid) def extend_hit(self, hitid, assignments_increment=None, expiration_increment=None): self.connect_to_turk() self.mtc.extend_hit(hitid, assignments_increment=int(assignments_increment)) self.mtc.extend_hit(hitid, expiration_increment=int(expiration_increment)*60) def get_summary(self): try: balance = self.check_balance() summary = jsonify(balance=str(balance)) return(summary) except MTurkRequestError as e: print(e.error_message) return(False)