def email_worker(self, subject, message_text): """Sends an email to a worker, returns true on a successful send""" response = self.manager.email_worker( worker_id=self.worker_id, subject=subject, message_text=message_text ) if 'success' in response: shared_utils.print_and_log( logging.INFO, 'Email sent to worker ID: {}: Subject: {}: Text: {}'.format( self.worker_id, subject, message_text ) ) return True elif 'failure' in response: shared_utils.print_and_log( logging.WARN, "Unable to send email to worker ID: {}. Error: {}".format( self.worker_id, response['failure'] ) ) return False
def send_fb_message(self, receiver_id, message, is_response, quick_replies=None): """Sends a message directly to messenger""" api_address = 'https://graph.facebook.com/v2.6/me/messages' if quick_replies is not None: quick_replies = [create_reply_option(x, x) for x in quick_replies] ms = create_text_message(message, quick_replies) results = [] for m in ms: if m['text'] == '': continue # Skip blank messages payload = { "messaging_type": 'RESPONSE' if is_response else 'UPDATE', "recipient": { "id": receiver_id }, "message": m } response = requests.post( api_address, params=self.auth_args, json=payload ) result = response.json() shared_utils.print_and_log( logging.INFO, '"Facebook response from message send: {}"'.format(result) ) results.append(result) return results
def pay_bonus(self, worker_id, bonus_amount, assignment_id, reason, unique_request_token): """Handles paying bonus to a turker, fails for insufficient funds. Returns True on success and False on failure """ total_cost = mturk_utils.calculate_mturk_cost( payment_opt={'type': 'bonus', 'amount': bonus_amount} ) if not mturk_utils.check_mturk_balance(balance_needed=total_cost, is_sandbox=self.is_sandbox): shared_utils.print_and_log( logging.WARN, 'Cannot pay bonus. Reason: Insufficient ' 'funds in your MTurk account.', should_print=True ) return False client = mturk_utils.get_mturk_client(self.is_sandbox) # unique_request_token may be useful for handling future network errors client.send_bonus( WorkerId=worker_id, BonusAmount=str(bonus_amount), AssignmentId=assignment_id, Reason=reason, UniqueRequestToken=unique_request_token ) shared_utils.print_and_log( logging.INFO, 'Paid ${} bonus to WorkerId: {}'.format( bonus_amount, worker_id ) ) return True
def on_socket_open(*args): shared_utils.print_and_log( logging.DEBUG, 'Socket open: {}'.format(args) ) self._send_world_alive() self.alive = True
def send_fb_payload(self, receiver_id, payload): """Sends a payload to messenger, processes it if we can""" api_address = 'https://graph.facebook.com/v2.6/me/messages' if payload['type'] == 'list': data = create_compact_list_message(payload['data']) else: data = payload['data'] message = { "messaging_type": 'RESPONSE', "recipient": { "id": receiver_id }, "message": { "attachment": data, } } response = requests.post( api_address, params=self.auth_args, json=message, ) result = response.json() shared_utils.print_and_log( logging.INFO, '"Facebook response from message send: {}"'.format(result) ) return result
def send_message(self, receiver_id, assignment_id, data, blocking=True, ack_func=None): """Send a message through the socket manager, update conversation state """ data['type'] = data_model.MESSAGE_TYPE_MESSAGE # Force messages to have a unique ID if 'message_id' not in data: data['message_id'] = str(uuid.uuid4()) event_id = shared_utils.generate_event_id(receiver_id) packet = Packet( event_id, Packet.TYPE_MESSAGE, self.socket_manager.get_my_sender_id(), receiver_id, assignment_id, data, blocking=blocking, ack_func=ack_func ) shared_utils.print_and_log( logging.INFO, 'Manager sending: {}'.format(packet), should_print=self.opt['verbose'] ) # Push outgoing message to the message thread to be able to resend # on a reconnect event agent = self._get_agent(receiver_id, assignment_id) if agent is not None: agent.state.messages.append(packet.data) self.socket_manager.queue_packet(packet)
def open_channel(self, worker_id, assignment_id): """Opens a channel for a worker on a given assignment, doesn't re-open if the channel is already open. Handles creation of the thread that monitors that channel""" connection_id = '{}_{}'.format(worker_id, assignment_id) if connection_id in self.queues and self.run[connection_id]: shared_utils.print_and_log( logging.DEBUG, 'Channel ({}) already open'.format(connection_id) ) return self.run[connection_id] = True self.queues[connection_id] = PriorityQueue() def channel_thread(): """Handler thread for monitoring a single channel""" # while the thread is still alive while self.run[connection_id]: try: # Check if client is still alive if (time.time() - self.last_heartbeat[connection_id] > self.socket_dead_timeout): self.run[connection_id] = False self.socket_dead_callback(worker_id, assignment_id) # Make sure the queue still exists if not connection_id in self.queues: self.run[connection_id] = False break # Get first item in the queue, check if we can send it yet item = self.queues[connection_id].get(block=False) t = item[0] if time.time() < t: # Put the item back into the queue, # it's not time to pop yet self._safe_put(connection_id, item) else: # Try to send the packet packet = item[1] if not packet: # This packet was deleted out from under us continue if packet.status is not Packet.STATUS_ACK: # either need to send initial packet # or resend not-acked packet self._send_packet(packet, connection_id, t) except Empty: pass finally: time.sleep(shared_utils.THREAD_MEDIUM_SLEEP) # Setup and run the channel sending thread self.threads[connection_id] = threading.Thread( target=channel_thread, name='Socket-Queue-{}'.format(connection_id) ) self.threads[connection_id].daemon = True self.threads[connection_id].start()
def _print_not_available_for(self, item): shared_utils.print_and_log( logging.WARN, 'Conversation ID: {}, Agent ID: {} - HIT ' 'is abandoned and thus not available for ' '{}.'.format(self.conversation_id, self.id, item), should_print=True )
def on_disconnect(*args): """Disconnect event is a no-op for us, as the server reconnects automatically on a retry""" shared_utils.print_and_log( logging.INFO, 'World server disconnected: {}'.format(args) ) self.alive = False
def block_worker(self, reason='unspecified'): """Block a worker from our tasks""" self.manager.block_worker(worker_id=self.worker_id, reason=reason) shared_utils.print_and_log( logging.WARN, 'Blocked worker ID: {}. Reason: {}'.format(self.worker_id, reason), should_print=True )
def log_reconnect(self): """Log a reconnect of this agent """ shared_utils.print_and_log( logging.DEBUG, 'Agent ({})_({}) reconnected to {} with status {}'.format( self.worker_id, self.assignment_id, self.conversation_id, self.state.status ) )
def expire_all_unassigned_hits(self): """Move through the whole hit_id list and attempt to expire the HITs, though this only immediately expires those that aren't assigned. """ shared_utils.print_and_log(logging.INFO, 'Expiring all unassigned HITs...', should_print=not self.is_test) for hit_id in self.hit_id_list: mturk_utils.expire_hit(self.is_sandbox, hit_id)
def on_message(*args): """Incoming message handler for ACKs, ALIVEs, HEARTBEATs, and MESSAGEs""" message_data = args[0] shared_utils.print_and_log( logging.DEBUG, 'Message data recieved: {}'.format(message_data) ) for message_packet in message_data['entry']: self.message_callback(message_packet['messaging'][0])
def _log_missing_agent(self, worker_id, assignment_id): """Logs when an agent was expected to exist, yet for some reason it didn't. If these happen often there is a problem""" shared_utils.print_and_log( logging.WARN, 'Expected to have an agent for {}_{}, yet none was found'.format( worker_id, assignment_id ) )
def log_reconnect(self): """ Log a reconnect of this agent. """ shared_utils.print_and_log( logging.DEBUG, 'Agent ({})_({}) reconnected to {} with status {}'.format( self.worker_id, self.assignment_id, self.conversation_id, self.get_status(), ), )
def prepare_timeout(self): """Log a timeout event, tell mturk manager it occurred, return message to return for the act call """ shared_utils.print_and_log( logging.INFO, '{} timed out before sending.'.format(self.id) ) self.mturk_manager.handle_turker_timeout( self.worker_id, self.assignment_id ) return self._get_episode_done_msg(TIMEOUT_MESSAGE)
def on_disconnect(*args): """ Disconnect event is a no-op for us, as the server reconnects automatically on a retry. Just in case the server is actually dead we set up a thread to reap the whole task. """ shared_utils.print_and_log( logging.INFO, 'World server disconnected: {}'.format(args)) self._ensure_closed() if not self.is_shutdown: self._spawn_reaper_thread()
def _safe_put(self, connection_id, item): """Ensures that a queue exists before putting an item into it, logs if there's a failure """ if connection_id in self.queues: self.queues[connection_id].put(item) else: shared_utils.print_and_log( logging.WARN, 'Queue {} did not exist to put a message in'.format( connection_id ) )
def _set_worker_status_to_waiting(self, pkt): """Changes assignment status to waiting based on the packet""" agent = self._get_agent_from_pkt(pkt) if agent is not None: agent.state.status = AssignState.STATUS_WAITING # Add the worker to pool with self.worker_pool_change_condition: shared_utils.print_and_log( logging.DEBUG, "Adding worker {} to pool...".format(agent.worker_id) ) self.worker_pool.append(agent)
def channel_thread(): """Handler thread for monitoring a single channel""" # while the thread is still alive shared_utils.print_and_log( logging.DEBUG, 'Channel ({}) opened'.format(connection_id)) self.last_sent_heartbeat_time[connection_id] = 0 self.pongs_without_heartbeat[connection_id] = 0 self.last_received_heartbeat[connection_id] = None while self.run[connection_id]: try: # Send a heartbeat if needed self._send_needed_heartbeat(connection_id) # Check if client is still alive if (self.pongs_without_heartbeat[connection_id] > self.missed_pongs): self.run[connection_id] = False self.socket_dead_callback(worker_id, assignment_id) break # Make sure the queue still exists if connection_id not in self.queues: self.run[connection_id] = False break try: # Get first item in the queue, check if can send it yet item = self.queues[connection_id].get(block=False) t = item[0] if time.time() < t: # Put the item back into the queue, # it's not time to pop yet self._safe_put(connection_id, item) else: # Try to send the packet packet = item[1] if not packet: # This packet was deleted out from under us continue if packet.status is not Packet.STATUS_ACK: # either need to send initial packet # or resend not-acked packet self._send_packet(packet, connection_id, t) except Empty: pass except BaseException as e: shared_utils.print_and_log( logging.WARN, 'Unexpected error occurred in socket handling thread: ' '{}'.format(repr(e)), should_print=True, ) finally: time.sleep(shared_utils.THREAD_MEDIUM_SLEEP)
def create_additional_hits(self, num_hits): """Handle creation for a specific number of hits/assignments Put created HIT ids into the hit_id_list """ shared_utils.print_and_log(logging.INFO, 'Creating {} hits...'.format(num_hits)) hit_type_id = mturk_utils.create_hit_type( hit_title=self.opt['hit_title'], hit_description='{} (ID: {})'.format(self.opt['hit_description'], self.task_group_id), hit_keywords=self.opt['hit_keywords'], hit_reward=self.opt['reward'], assignment_duration_in_seconds= # Set to 30 minutes by default self.opt.get('assignment_duration_in_seconds', 30 * 60), is_sandbox=self.opt['is_sandbox'] ) mturk_chat_url = '{}/chat_index?task_group_id={}'.format( self.server_url, self.task_group_id ) shared_utils.print_and_log(logging.INFO, mturk_chat_url) mturk_page_url = None mturk_utils.subscribe_to_hits( hit_type_id, self.is_sandbox, self.topic_arn ) if self.opt['unique_worker'] == True: # Use a single hit with many assignments to allow # workers to only work on the task once mturk_page_url, hit_id = mturk_utils.create_hit_with_hit_type( page_url=mturk_chat_url, hit_type_id=hit_type_id, num_assignments=num_hits, is_sandbox=self.is_sandbox ) self.hit_id_list.append(hit_id) else: # Create unique hits, allowing one worker to be able to handle many # tasks without needing to be unique for i in range(num_hits): mturk_page_url, hit_id = mturk_utils.create_hit_with_hit_type( page_url=mturk_chat_url, hit_type_id=hit_type_id, num_assignments=1, is_sandbox=self.is_sandbox ) self.hit_id_list.append(hit_id) return mturk_page_url
def _on_socket_dead(self, worker_id, assignment_id): """Handle a disconnect event, update state as required and notifying other agents if the disconnected agent was in conversation with them returns False if the socket death should be ignored and the socket should stay open and not be considered disconnected """ agent = self._get_agent(worker_id, assignment_id) if agent is None: # This worker never registered, so we don't do anything return shared_utils.print_and_log( logging.DEBUG, 'Worker {} disconnected from {} in status {}'.format( worker_id, assignment_id, agent.state.status ) ) if agent.state.status == AssignState.STATUS_NONE: # Agent never made it to onboarding, delete agent.state.status = AssignState.STATUS_DISCONNECT agent.reduce_state() elif agent.state.status == AssignState.STATUS_ONBOARDING: # Agent never made it to task pool, the onboarding thread will die # and delete the agent if we mark it as a disconnect agent.state.status = AssignState.STATUS_DISCONNECT agent.disconnected = True elif agent.state.status == AssignState.STATUS_WAITING: # agent is in pool, remove from pool and delete if agent in self.worker_pool: with self.worker_pool_change_condition: self.worker_pool.remove(agent) agent.state.status = AssignState.STATUS_DISCONNECT agent.reduce_state() elif agent.state.status == AssignState.STATUS_IN_TASK: self._handle_worker_disconnect(worker_id, assignment_id) agent.disconnected = True elif agent.state.status == AssignState.STATUS_DONE: # It's okay if a complete assignment socket dies, but wait for the # world to clean up the resource return elif agent.state.status == AssignState.STATUS_ASSIGNED: # mark the agent in the assigned state as disconnected, the task # spawn thread is responsible for cleanup agent.state.status = AssignState.STATUS_DISCONNECT agent.disconnected = True self.socket_manager.close_channel(agent.get_connection_id())
def _on_socket_dead(self, worker_id, assignment_id): """Handle a disconnect event, update state as required and notifying other agents if the disconnected agent was in conversation with them returns False if the socket death should be ignored and the socket should stay open and not be considered disconnected """ agent = self._get_agent(worker_id, assignment_id) if agent is None: # This worker never registered, so we don't do anything return shared_utils.print_and_log( logging.DEBUG, 'Worker {} disconnected from {} in status {}'.format( worker_id, assignment_id, agent.state.status ) ) if agent.state.status == AssignState.STATUS_NONE: # Agent never made it to onboarding, delete agent.state.status = AssignState.STATUS_DISCONNECT agent.reduce_state() elif agent.state.status == AssignState.STATUS_ONBOARDING: # Agent never made it to task pool, the onboarding thread will die # and delete the agent if we mark it as a disconnect agent.state.status = AssignState.STATUS_DISCONNECT agent.disconnected = True elif agent.state.status == AssignState.STATUS_WAITING: # agent is in pool, remove from pool and delete if agent in self.worker_pool: with self.worker_pool_change_condition: self.worker_pool.remove(agent) agent.state.status = AssignState.STATUS_DISCONNECT agent.reduce_state() elif agent.state.status == AssignState.STATUS_IN_TASK: self._handle_worker_disconnect(worker_id, assignment_id) agent.disconnected = True elif agent.state.status == AssignState.STATUS_DONE: # It's okay if a complete assignment socket dies, but wait for the # world to clean up the resource return elif agent.state.status == AssignState.STATUS_ASSIGNED: # mark the agent in the assigned state as disconnected, the task # spawn thread is responsible for cleanup agent.state.status = AssignState.STATUS_DISCONNECT agent.disconnected = True self.socket_manager.close_channel(agent.get_connection_id())
def create_persona(self, name, image_url): """Creates a new persona and returns persona_id""" api_address = 'https://graph.facebook.com/me/personas' message = {'name': name, "profile_picture_url": image_url} response = requests.post( api_address, params=self.auth_args, json=message, ) result = response.json() shared_utils.print_and_log( logging.INFO, '"Facebook response from create persona: {}"'.format(result)) return result
def _safe_put(self, connection_id, item): """Ensures that a queue exists before putting an item into it, logs if there's a failure """ if connection_id in self.queues: self.queues[connection_id].put(item) else: item[1].status = Packet.STATUS_FAIL shared_utils.print_and_log( logging.WARN, 'Queue {} did not exist to put a message in'.format( connection_id ) )
def close_channel(self, connection_id): """Closes a channel by connection_id""" print_and_log('Closing channel {}'.format(connection_id), False) self.run[connection_id] = False if connection_id in self.queues: # Clean up packets packet_ids = list(self.packet_map.keys()) for packet_id in packet_ids: if connection_id == \ self.packet_map[packet_id].get_receiver_connection_id(): del self.packet_map[packet_id] # Clean up other resources del self.queues[connection_id] del self.threads[connection_id]
def reject_work(self, reason='unspecified'): """Reject work after it has been submitted""" if self.hit_is_abandoned: self._print_not_available_for('review') else: if self.manager.get_agent_work_status(self.assignment_id) == \ self.ASSIGNMENT_DONE: self.manager.reject_work(self.assignment_id, reason) print_and_log('Conversation ID: {}, Agent ID: {} - HIT is ' 'rejected.'.format(self.conversation_id, self.id)) else: print_and_log('Cannot reject HIT. Reason: Turker hasn\'t ' 'completed the HIT yet.')
def open_channel(self, worker_id, assignment_id): """ Opens a channel for a worker on a given assignment, doesn't re-open if the channel is already open. """ connection_id = '{}_{}'.format(worker_id, assignment_id) if connection_id in self.queues and self.run[connection_id]: shared_utils.print_and_log( logging.DEBUG, 'Channel ({}) already open'.format(connection_id)) return self.run[connection_id] = True self.queues[connection_id] = PriorityQueue() self.worker_assign_ids[connection_id] = (worker_id, assignment_id)
def on_message(*args): """Incoming message handler for ACKs, ALIVEs, HEARTBEATs, PONGs, and MESSAGEs""" packet_dict = json.loads(args[1]) if packet_dict['type'] == 'conn_success': self.alive = True return # No action for successful connection packet = Packet.from_dict(packet_dict['content']) if packet is None: return packet_id = packet.id packet_type = packet.type connection_id = packet.get_sender_connection_id() if packet_type == Packet.TYPE_ACK: if packet_id not in self.packet_map: # Don't do anything when acking a packet we don't have return # Acknowledgements should mark a packet as acknowledged shared_utils.print_and_log(logging.DEBUG, 'On new ack: {}'.format(args)) self.packet_map[packet_id].status = Packet.STATUS_ACK # If the packet sender wanted to do something on acknowledge if self.packet_map[packet_id].ack_func: self.packet_map[packet_id].ack_func(packet) # clear the stored packet data for memory reasons try: self.packet_map[packet_id].data = None except Exception: pass # state already reduced, perhaps by ack_func elif packet_type == Packet.TYPE_HEARTBEAT: # Heartbeats update the last heartbeat, clears pongs w/o beat self.last_received_heartbeat[connection_id] = packet self.pongs_without_heartbeat[connection_id] = 0 elif packet_type == Packet.TYPE_PONG: # Message in response from the router, ensuring we're connected # to it. Redundant but useful for metering from web client. pong_connection_id = packet.get_receiver_connection_id() if self.last_received_heartbeat[ pong_connection_id] is not None: self.pongs_without_heartbeat[pong_connection_id] += 1 else: # Remaining packet types need to be acknowledged shared_utils.print_and_log(logging.DEBUG, 'On new message: {}'.format(args)) self._send_ack(packet) # Call the appropriate callback if packet_type == Packet.TYPE_ALIVE: self.alive_callback(packet) elif packet_type == Packet.TYPE_MESSAGE: self.message_callback(packet)
def approve_work(self): """Approving work after it has been submitted""" if self.hit_is_abandoned: self._print_not_available_for('review') else: if self.manager.get_agent_work_status(self.assignment_id) == \ self.ASSIGNMENT_DONE: self.manager.approve_work(assignment_id=self.assignment_id) print_and_log('Conversation ID: {}, Agent ID: {} - HIT is ' 'approved.'.format(self.conversation_id, self.id)) else: print_and_log('Cannot approve HIT. Reason: Turker hasn\'t ' 'completed the HIT yet.')
def upload_fb_attachment(self, payload): """ Uploads an attachment using the Attachment Upload API and returns an attachment ID. """ api_address = 'https://graph.facebook.com/v2.6/me/message_attachments' assert payload['type'] in [ 'image', 'video', 'file', 'audio', ], 'unsupported attachment type' if 'url' in payload: message = { "message": { "attachment": { "type": payload['type'], "payload": {"is_reusable": "true", "url": payload['url']}, } } } response = requests.post(api_address, params=self.auth_args, json=message) elif 'filename' in payload: message = { "attachment": { "type": payload['type'], "payload": {"is_reusable": "true"}, } } with open(payload['filename'], 'rb') as f: filedata = { "filedata": ( payload['filename'], f, payload['type'] + '/' + payload['format'], ) } response = requests.post( api_address, params=self.auth_args, data={"message": json.dumps(message)}, files=filedata, ) result = response.json() shared_utils.print_and_log( logging.INFO, '"Facebook response from attachment upload: {}"'.format(result), ) return result
def on_message(*args): """Incoming message handler for messages from the FB user""" packet_dict = json.loads(args[1]) if packet_dict['type'] == 'conn_success': self.alive = True return # No action for successful connection if packet_dict['type'] == 'pong': self.last_pong = time.time() return # No further action for pongs message_data = packet_dict['content'] shared_utils.print_and_log( logging.DEBUG, 'Message data recieved: {}'.format(message_data)) for message_packet in message_data['entry']: self.message_callback(message_packet['messaging'][0])
def reject_work(self, reason='unspecified'): """Reject work after it has been submitted""" if self.hit_is_abandoned: self._print_not_available_for('review') else: if self.mturk_manager.get_agent_work_status(self.assignment_id) \ == self.ASSIGNMENT_DONE: self.mturk_manager.reject_work(self.assignment_id, reason) shared_utils.print_and_log( logging.INFO, 'Conversation ID: {}, Agent ID: {} - HIT is ' 'rejected.'.format(self.conversation_id, self.id)) else: shared_utils.print_and_log( logging.WARN, 'Cannot reject HIT. Turker hasn\'t completed the HIT yet.')
def email_worker(self, subject, message_text): """Sends an email to a worker, returns true on a successful send""" response = self.manager.email_worker(worker_id=self.worker_id, subject=subject, message_text=message_text) if 'success' in response: print_and_log( 'Email sent to worker ID: {}: Subject: {}: Text: {}'.format( self.worker_id, subject, message_text)) return True elif 'failure' in response: print_and_log( "Unable to send email to worker ID: {}. Error: {}".format( self.worker_id, response['failure'])) return False
def channel_thread(self): """ Handler thread for monitoring all channels to send things to. """ # while the thread is still alive while not self.is_shutdown: for connection_id in self.run.copy(): if not self.run[connection_id]: continue try: # Make sure the queue still exists if connection_id not in self.queues: self.run[connection_id] = False break if self.blocking_packets.get(connection_id) is not None: packet_item = self.blocking_packets[connection_id] if not self.packet_should_block(packet_item): self.blocking_packets[connection_id] = None else: continue try: # Get first item in the queue, check if can send it yet item = self.queues[connection_id].get(block=False) t = item[0] if time.time() < t: # Put the item back into the queue, # it's not time to pop yet self._safe_put(connection_id, item) else: # Try to send the packet packet = item[1] if not packet: # This packet was deleted out from under us continue if packet.status is not Packet.STATUS_ACK: # either need to send initial packet # or resend not-acked packet self._send_packet(packet, connection_id, t) except Empty: pass except Exception as e: shared_utils.print_and_log( logging.WARN, 'Unexpected error occurred in socket handling thread: ' '{}'.format(repr(e)), should_print=True, ) time.sleep(shared_utils.THREAD_SHORT_SLEEP)
def queue_packet(self, packet): """Queues sending a packet to its intended owner""" connection_id = packet.get_receiver_connection_id() if not self.socket_is_open(connection_id): # Warn if there is no socket to send through for the expected recip print_and_log( 'Can not send packet to worker_id {}: packet queue not found. ' 'Message: {}'.format(connection_id, packet.data)) return print_and_log( 'Put packet ({}) in queue ({})'.format(packet.id, connection_id), False) # Get the current time to put packet into the priority queue self.packet_map[packet.id] = packet item = (time.time(), packet) self._safe_put(connection_id, item)
def _handle_bad_disconnect(self, worker_id): """Update the number of bad disconnects for the given worker, block them if they've exceeded the disconnect limit """ self.worker_state[worker_id].disconnects += 1 self.disconnects.append({'time': time.time(), 'id': worker_id}) if self.worker_state[worker_id].disconnects > MAX_DISCONNECTS: text = ('This worker has repeatedly disconnected from these tasks,' ' which require constant connection to complete properly ' 'as they involve interaction with other Turkers. They have' ' been blocked to ensure a better experience for other ' 'workers who don\'t disconnect.') self.block_worker(worker_id, text) print_and_log( 'Worker {} was blocked - too many disconnects'.format( worker_id))
def _get_connection(self): '''Returns a singular database connection to be shared amongst all calls ''' if self.conn is None: try: conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row self.conn = conn except sqlite3.Error as e: shared_utils.print_and_log( logging.ERROR, "Could not get db connection, failing: {}".format(repr(e)), should_print=True) raise e return self.conn
def open_channel(self, worker_id, assignment_id): """Opens a channel for a worker on a given assignment, doesn't re-open if the channel is already open. Handles creation of the thread that monitors that channel""" connection_id = '{}_{}'.format(worker_id, assignment_id) if connection_id in self.queues and self.run[connection_id]: shared_utils.print_and_log( logging.DEBUG, 'Channel ({}) already open'.format(connection_id)) return self.run[connection_id] = True self.queues[connection_id] = PriorityQueue() self.last_sent_heartbeat_time[connection_id] = 0 self.pongs_without_heartbeat[connection_id] = 0 self.last_received_heartbeat[connection_id] = None self.worker_assign_ids[connection_id] = (worker_id, assignment_id)
def send_fb_payload(self, receiver_id, payload, quick_replies=None, persona_id=None): """Sends a payload to messenger, processes it if we can""" api_address = 'https://graph.facebook.com/v2.6/me/messages' if payload['type'] == 'list': data = create_compact_list_message(payload['data']) elif payload['type'] in ['image', 'video', 'file', 'audio']: data = create_attachment(payload) else: data = payload['data'] message = { "messaging_type": 'RESPONSE', "recipient": { "id": receiver_id }, "message": { "attachment": data, } } if quick_replies is not None: quick_replies = [create_reply_option(x, x) for x in quick_replies] message['message']['quick_replies'] = quick_replies if persona_id is not None: payload['persona_id'] = persona_id response = requests.post( api_address, params=self.auth_args, json=message, ) result = response.json() if 'error' in result: if result['error']['code'] == 1200: # temporary error please retry response = requests.post( api_address, params=self.auth_args, json=message, ) result = response.json() shared_utils.print_and_log( logging.INFO, '"Facebook response from message send: {}"'.format(result)) return result
def _get_connection(self): """Returns a singular database connection to be shared amongst all calls """ curr_thread = threading.get_ident() if curr_thread not in self.conn or self.conn[curr_thread] is None: try: conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row self.conn[curr_thread] = conn except sqlite3.Error as e: shared_utils.print_and_log( logging.ERROR, "Could not get db connection, failing: {}".format(repr(e)), should_print=True) raise e return self.conn[curr_thread]
def approve_work(self): """Approving work after it has been submitted""" if self.hit_is_abandoned: self._print_not_available_for('review') else: if self.mturk_manager.get_agent_work_status(self.assignment_id) \ == self.ASSIGNMENT_DONE: self.mturk_manager.approve_work( assignment_id=self.assignment_id) shared_utils.print_and_log( logging.INFO, 'Conversation ID: {}, Agent ID: {} - HIT is ' 'approved.'.format(self.conversation_id, self.id)) else: shared_utils.print_and_log( logging.WARN, 'Cannot approve HIT. Turker hasn\'t completed the HIT yet.' )
def close_channel(self, connection_id): """Closes a channel by connection_id""" shared_utils.print_and_log( logging.DEBUG, 'Closing channel {}'.format(connection_id) ) self.run[connection_id] = False if connection_id in self.queues: # Clean up packets packet_ids = list(self.packet_map.keys()) for packet_id in packet_ids: if connection_id == \ self.packet_map[packet_id].get_receiver_connection_id(): del self.packet_map[packet_id] # Clean up other resources del self.queues[connection_id] del self.threads[connection_id]
def on_error(ws, error): try: if error.errno == errno.ECONNREFUSED: ws.close() self.use_socket = False raise Exception("Socket refused connection, cancelling") else: shared_utils.print_and_log( logging.WARN, 'Socket logged error: {}'.format(error), ) except BaseException: shared_utils.print_and_log( logging.WARN, 'Socket logged string error: {} Restarting'.format(error), ) ws.close()
def route_packet(self, pkt): """Put an incoming message into the queue for the agent specified in the packet, as they have sent a message from the web client. """ worker_id = pkt.sender_id assignment_id = pkt.assignment_id agent = self._get_agent(worker_id, assignment_id) if agent is not None: shared_utils.print_and_log(logging.INFO, 'Manager received: {}'.format(pkt), should_print=self.opt['verbose']) # Push the message to the message thread to send on a reconnect agent.append_message(pkt.data) # Clear the send message command, as a message was recieved agent.set_last_command(None) agent.put_data(pkt.id, pkt.data)
def pay_bonus(self, bonus_amount, reason='unspecified'): """Pays the given agent the given bonus""" if self.hit_is_abandoned: self._print_not_available_for('bonus') else: if self.manager.get_agent_work_status(self.assignment_id) in \ (self.ASSIGNMENT_DONE, self.ASSIGNMENT_APPROVED): unique_request_token = str(uuid.uuid4()) self.manager.pay_bonus( worker_id=self.worker_id, bonus_amount=bonus_amount, assignment_id=self.assignment_id, reason=reason, unique_request_token=unique_request_token) else: print_and_log('Cannot pay bonus for HIT. Reason: Turker ' 'hasn\'t completed the HIT yet.')
def _send_packet(self, packet, connection_id, send_time): """Sends a packet, blocks if the packet is blocking""" # Send the packet pkt = packet.as_dict() if pkt['data'] is None: return # This packet was _just_ acked. shared_utils.print_and_log(logging.DEBUG, 'Send packet: {}'.format(packet)) result = self._safe_send( json.dumps({ 'type': data_model.SOCKET_ROUTE_PACKET_STRING, 'content': pkt, })) if not result: # The channel died mid-send, wait for it to come back up self._safe_put(connection_id, (send_time, packet)) return if packet.status != Packet.STATUS_ACK: packet.status = Packet.STATUS_SENT # Handles acks and blocking if packet.requires_ack: if packet.blocking: # blocking till ack is received or timeout start_t = time.time() while True: if packet.status == Packet.STATUS_ACK: # Clear the data to save memory as we no longer need it packet.data = None break if packet.status == Packet.STATUS_FAIL: # Failed packets shouldn't be re-queued as they errored break if time.time() - start_t > self.ACK_TIME[packet.type]: # didn't receive ACK, resend packet keep old queue time # to ensure this packet is processed first packet.status = Packet.STATUS_INIT self._safe_put(connection_id, (send_time, packet)) break time.sleep(shared_utils.THREAD_SHORT_SLEEP) else: # non-blocking ack: add ack-check to queue t = time.time() + self.ACK_TIME[packet.type] self._safe_put(connection_id, (t, packet))
def create_additional_hits(self, num_hits): """Handle creation for a specific number of hits/assignments Put created HIT ids into the hit_id_list """ shared_utils.print_and_log(logging.INFO, 'Creating {} hits...'.format(num_hits)) hit_type_id = mturk_utils.create_hit_type( hit_title=self.opt['hit_title'], hit_description='{} (ID: {})'.format(self.opt['hit_description'], self.task_group_id), hit_keywords=self.opt['hit_keywords'], hit_reward=self.opt['reward'], assignment_duration_in_seconds= # Set to 30 minutes by default self.opt.get('assignment_duration_in_seconds', 30 * 60), is_sandbox=self.opt['is_sandbox'] ) mturk_chat_url = '{}/chat_index?task_group_id={}'.format( self.server_url, self.task_group_id ) shared_utils.print_and_log(logging.INFO, mturk_chat_url) mturk_page_url = None if self.opt['unique_worker'] == True: # Use a single hit with many assignments to allow # workers to only work on the task once mturk_page_url, hit_id = mturk_utils.create_hit_with_hit_type( page_url=mturk_chat_url, hit_type_id=hit_type_id, num_assignments=num_hits, is_sandbox=self.is_sandbox ) self.hit_id_list.append(hit_id) else: # Create unique hits, allowing one worker to be able to handle many # tasks without needing to be unique for i in range(num_hits): mturk_page_url, hit_id = mturk_utils.create_hit_with_hit_type( page_url=mturk_chat_url, hit_type_id=hit_type_id, num_assignments=1, is_sandbox=self.is_sandbox ) self.hit_id_list.append(hit_id) return mturk_page_url
def on_error(ws, error): try: if error.errno == errno.ECONNREFUSED: self._ensure_closed() self.use_socket = False raise Exception("Socket refused connection, cancelling") else: shared_utils.print_and_log( logging.WARN, 'Socket logged error: {}'.format(repr(error))) except BaseException: if type(error) is websocket.WebSocketConnectionClosedException: return # Connection closed is noop shared_utils.print_and_log( logging.WARN, 'Socket logged error: {} Restarting'.format(repr(error)), ) self._ensure_closed()
def reject_work(self, reason='unspecified'): """Reject work after it has been submitted""" if self.hit_is_abandoned: self._print_not_available_for('review') else: if self.manager.get_agent_work_status(self.assignment_id) == \ self.ASSIGNMENT_DONE: self.manager.reject_work(self.assignment_id, reason) shared_utils.print_and_log( logging.INFO, 'Conversation ID: {}, Agent ID: {} - HIT is ' 'rejected.'.format(self.conversation_id, self.id) ) else: shared_utils.print_and_log( logging.WARN, 'Cannot reject HIT. Turker hasn\'t completed the HIT yet.' )
def approve_work(self): """Approving work after it has been submitted""" if self.hit_is_abandoned: self._print_not_available_for('review') else: if self.manager.get_agent_work_status(self.assignment_id) == \ self.ASSIGNMENT_DONE: self.manager.approve_work(assignment_id=self.assignment_id) shared_utils.print_and_log( logging.INFO, 'Conversation ID: {}, Agent ID: {} - HIT is ' 'approved.'.format(self.conversation_id, self.id) ) else: shared_utils.print_and_log( logging.WARN, 'Cannot approve HIT. Turker hasn\'t completed the HIT yet.' )
def prepare_timeout(self): """Log a timeout event, tell mturk manager it occurred, return message to return for the act call """ shared_utils.print_and_log( logging.INFO, '{} timed out before sending.'.format(self.id) ) self.manager.handle_turker_timeout( self.worker_id, self.assignment_id ) msg = { 'id': self.id, 'text': TIMEOUT_MESSAGE, 'episode_done': True } return msg
def queue_packet(self, packet): """Queues sending a packet to its intended owner""" connection_id = packet.get_receiver_connection_id() if not self.socket_is_open(connection_id): # Warn if there is no socket to send through for the expected recip shared_utils.print_and_log( logging.WARN, 'Can not send packet to worker_id {}: packet queue not found. ' 'Message: {}'.format(connection_id, packet.data) ) return shared_utils.print_and_log( logging.DEBUG, 'Put packet ({}) in queue ({})'.format(packet.id, connection_id) ) # Get the current time to put packet into the priority queue self.packet_map[packet.id] = packet item = (time.time(), packet) self._safe_put(connection_id, item)
def pay_bonus(self, bonus_amount, reason='unspecified'): """Pays the given agent the given bonus""" if self.hit_is_abandoned: self._print_not_available_for('bonus') else: if self.manager.get_agent_work_status(self.assignment_id) in \ (self.ASSIGNMENT_DONE, self.ASSIGNMENT_APPROVED): unique_request_token = str(uuid.uuid4()) self.manager.pay_bonus( worker_id=self.worker_id, bonus_amount=bonus_amount, assignment_id=self.assignment_id, reason=reason, unique_request_token=unique_request_token ) else: shared_utils.print_and_log( logging.WARN, 'Cannot pay bonus for HIT. Reason: Turker ' 'hasn\'t completed the HIT yet.' )
def _on_new_message(self, pkt): """Put an incoming message onto the correct agent's message queue and add it to the proper message thread as long as the agent is active """ worker_id = pkt.sender_id assignment_id = pkt.assignment_id agent = self._get_agent(worker_id, assignment_id) if agent is None: self._log_missing_agent(worker_id, assignment_id) elif not agent.state.is_final(): shared_utils.print_and_log( logging.INFO, 'Manager received: {}'.format(pkt), should_print=self.opt['verbose'] ) # Push the message to the message thread to send on a reconnect agent.state.messages.append(pkt.data) # Clear the send message command, as a message was recieved agent.state.last_command = None # TODO ensure you can't duplicate a message push here agent.msg_queue.put(pkt.data)
def on_message(*args): """Incoming message handler for ACKs, ALIVEs, HEARTBEATs, and MESSAGEs""" packet = Packet.from_dict(args[0]) packet_id = packet.id packet_type = packet.type connection_id = packet.get_sender_connection_id() if packet_type == Packet.TYPE_ACK: if packet_id not in self.packet_map: # Don't do anything when acking a packet we don't have return # Acknowledgements should mark a packet as acknowledged shared_utils.print_and_log( logging.DEBUG, 'On new ack: {}'.format(args) ) self.packet_map[packet_id].status = Packet.STATUS_ACK # If the packet sender wanted to do something on acknowledge if self.packet_map[packet_id].ack_func: self.packet_map[packet_id].ack_func(packet) # clear the stored packet data for memory reasons self.packet_map[packet_id].data = None elif packet_type == Packet.TYPE_HEARTBEAT: # Heartbeats update the last heartbeat time and respond in kind self.last_heartbeat[connection_id] = time.time() self._send_response_heartbeat(packet) else: # Remaining packet types need to be acknowledged shared_utils.print_and_log( logging.DEBUG, 'On new message: {}'.format(args) ) self._send_ack(packet) # Call the appropriate callback if packet_type == Packet.TYPE_ALIVE: self.last_heartbeat[connection_id] = time.time() self.alive_callback(packet) elif packet_type == Packet.TYPE_MESSAGE: self.message_callback(packet)
def _task_function(opt, workers, conversation_id): """Wait for all workers to join world before running the task""" shared_utils.print_and_log( logging.INFO, 'Starting task {}...'.format(conversation_id) ) shared_utils.print_and_log( logging.DEBUG, 'Waiting for all workers to join the conversation...' ) start_time = time.time() while True: all_joined = True for worker in workers: # check the status of an individual worker assignment if worker.state.status != AssignState.STATUS_IN_TASK: all_joined = False if all_joined: break if time.time() - start_time > WORLD_START_TIMEOUT: # We waited but not all workers rejoined, throw workers # back into the waiting pool. Stragglers will disconnect # from there shared_utils.print_and_log( logging.INFO, 'Timeout waiting for {}, move back to waiting'.format( conversation_id ) ) self._move_workers_to_waiting(workers) return time.sleep(shared_utils.THREAD_SHORT_SLEEP) shared_utils.print_and_log( logging.INFO, 'All workers joined the conversation {}!'.format( conversation_id ) ) self.started_conversations += 1 task_function(mturk_manager=self, opt=opt, workers=workers) # Delete extra state data that is now unneeded for worker in workers: worker.state.clear_messages() # Count if it's a completed conversation if self._no_workers_incomplete(workers): self.completed_conversations += 1
def _handle_bad_disconnect(self, worker_id): """Update the number of bad disconnects for the given worker, block them if they've exceeded the disconnect limit """ if not self.is_sandbox: self.mturk_workers[worker_id].disconnects += 1 self.disconnects.append({'time': time.time(), 'id': worker_id}) if self.mturk_workers[worker_id].disconnects > MAX_DISCONNECTS: text = ( 'This worker has repeatedly disconnected from these tasks,' ' which require constant connection to complete properly ' 'as they involve interaction with other Turkers. They have' ' been blocked to ensure a better experience for other ' 'workers who don\'t disconnect.' ) self.block_worker(worker_id, text) shared_utils.print_and_log( logging.INFO, 'Worker {} was blocked - too many disconnects'.format( worker_id ), True )
def _send_packet(self, packet, connection_id, send_time): """Sends a packet, blocks if the packet is blocking""" # Send the packet pkt = packet.as_dict() shared_utils.print_and_log( logging.DEBUG, 'Send packet: {}'.format(packet.data) ) def set_status_to_sent(data): packet.status = Packet.STATUS_SENT self.socketIO.emit( data_model.SOCKET_ROUTE_PACKET_STRING, pkt, set_status_to_sent ) # Handles acks and blocking if packet.requires_ack: if packet.blocking: # blocking till ack is received or timeout start_t = time.time() while True: if packet.status == Packet.STATUS_ACK: # Clear the data to save memory as we no longer need it packet.data = None break if time.time() - start_t > self.ACK_TIME[packet.type]: # didn't receive ACK, resend packet keep old queue time # to ensure this packet is processed first packet.status = Packet.STATUS_INIT self._safe_put(connection_id, (send_time, packet)) break time.sleep(shared_utils.THREAD_SHORT_SLEEP) else: # non-blocking ack: add ack-check to queue t = time.time() + self.ACK_TIME[packet.type] self._safe_put(connection_id, (t, packet))
def _check_hit_status(self): """Monitor and update the HIT status by polling""" # TODO-1 replace with code that subscribes to notifs to update status # Check if HIT is accepted while True: if self.hit_id: response = self.manager.get_hit(hit_id=self.hit_id) # Amazon MTurk system acknowledges that the HIT is accepted if response['HIT']['NumberOfAssignmentsPending'] == 1: shared_utils.print_and_log( logging.INFO, 'Worker has accepted the HIT' ) self.hit_is_accepted = True break time.sleep(shared_utils.THREAD_MTURK_POLLING_SLEEP) while True: if self.hit_id: response = self.manager.get_hit(hit_id=self.hit_id) # HIT is returned if response['HIT']['NumberOfAssignmentsAvailable'] == 1: self.hit_is_returned = True # If the worker is still in onboarding, then we don't need # to expire the HIT. # If the worker is already in a conversation, then we # should expire the HIT to keep the total number of # available HITs consistent with the number of # conversations left. if self.is_in_task(): shared_utils.print_and_log( logging.INFO, 'Worker {}_{} has returned the HIT {}. Since ' 'the worker is already in a task conversation, ' 'we are expiring the HIT.'.format( self.worker_id, self.assignment_id, self.hit_id ) ) self.manager.expire_hit(hit_id=self.hit_id) else: shared_utils.print_and_log( logging.INFO, 'Worker {}_{} has returned the HIT {}. Since ' 'the worker is still in onboarding, we will not ' 'expire the HIT.'.format( self.worker_id, self.assignment_id, self.hit_id ) ) # we will not be using this MTurkAgent object for another # worker, so no need to check its status anymore return time.sleep(shared_utils.THREAD_MTURK_POLLING_SLEEP)