def _create_or_get_worker(external_worker_id, source=None): worker = db_session.query(Worker) \ .filter_by(external_worker_id=external_worker_id) \ .first() if not worker: worker = Worker(external_worker_id=external_worker_id, source=source) db_session.add(worker) db_session.flush() return worker
def create_jobs(job_type, amount=1): if job_type not in ['user', 'system', SPECIES_TAG]: raise Exception('work type: "{}" does not exist. Use either system_task or user_task') job_filter = [Node.active_child_count == 0, Node.visited_count > 1] if job_type == SPECIES_TAG: job_filter = [Node.species == SPECIES_TAG, Node.active_child_count < 3] nodes = db_session.query(Node) \ .filter( Node.score > 0, Node.is_user == (job_type != 'user'), Node.active.is_(True), *job_filter )\ .order_by(Node.score.desc()) \ .all() created_jobs = [] for node in nodes: history_ids = node.path[-MAX_DIALOGUE_HISTORY:] history = db_session\ .query(Node)\ .filter(Node.id.in_(history_ids), Node.active.is_(True))\ .options(joinedload(Node.utterances), joinedload(Node.node_utterances))\ .order_by(Node.path_length.asc())\ .all() history_length = len(history) if len(history_ids) != history_length: logger.warning(f'history_ids != history, {history_ids} != {history}') continue job_node_utterances = [] for index, history_node in enumerate(history): pool_of_node_utterances = [] for node_utterance in history_node.node_utterances: if _check_node_utterance_eligibility(node_utterance, index == history_length - 1, job_type): pool_of_node_utterances.append(node_utterance) if pool_of_node_utterances: job_node_utterances.append(random.choice(pool_of_node_utterances)) if len(history_ids) == len(job_node_utterances): job = Job(job_type=job_type, persona_sample=get_persona_sample()) db_session.add(job) db_session.flush() for i, node_utterance in enumerate(job_node_utterances): db_session.add(JobNodeUtterance(job_id=job.id, node_utterance_id=node_utterance.id, position=i)) created_jobs.append(job) if len(created_jobs) == amount: break db_session.commit() print(f'created {len(created_jobs)} jobs') return created_jobs
def get_synonym_objects(synonym_path): synonym_objects = [] with open(synonym_path, "r") as f: for synonym in f.readlines(): utterance = (db_session.query(Utterance).filter_by( utterance_text=synonym.strip()).first()) if not utterance: utterance = Utterance(utterance_text=synonym.strip()) db_session.add(utterance) db_session.flush() synonym_objects.append(utterance) return synonym_objects
def delete_node(node_id): node = db_session.query(Node).get(node_id) for child in node.children: delete_node(child.id) for node_utterance in node.node_utterances: db_session.query(JobNodeUtterance).filter( JobNodeUtterance.node_utterance_id == node_utterance.id).delete() db_session.query(NodeUtteranceStatus).filter( NodeUtteranceStatus.node_utterance_id == node_utterance.id).delete() db_session.query(NodeUtteranceStatus).filter( NodeUtteranceStatus.referenced_node_utterance_id == node_utterance.id).delete() db_session.query(NodeUtteranceWorkerJob).filter( NodeUtteranceWorkerJob.node_utterance_id == node_utterance.id).delete() db_session.flush() db_session.delete(node_utterance) db_session.commit() db_session.delete(node) db_session.commit()
def create_new_node(utterances, source="manual", parent_id=None, commit=False, species=None): if type(parent_id) == Node: parent = parent_id elif parent_id is not None: parent = db_session.query(Node).get(parent_id) else: parent = None node = Node(parent=parent, species=species) db_session.add(node) db_session.flush() node.path = (parent.path if parent else []) + [node.id] if type(utterances) == str: utterances = [utterances] for utterance in utterances: add_utterance_to_node(utterance, node, source) if commit: db_session.commit() return node
def add_utterance_to_node(utterance_text, node, source): if not isinstance(utterance_text, Utterance): utterance = (db_session.query(Utterance).filter_by( utterance_text=utterance_text).first()) if not utterance: utterance = Utterance(utterance_text=utterance_text) db_session.add(utterance) db_session.flush() else: utterance = utterance_text node.utterances.append(utterance) db_session.flush() node_utterance = (db_session.query(NodeUtterance).filter_by( node_id=node.id).filter_by(utterance_id=utterance.id).first()) node_utterance.source = source db_session.flush() return node_utterance
def exact_match(): merges = db_session.query(Merging).all() used_nodes = [] for merge in merges: used_nodes.append(f"{merge.left_node_id}--{merge.right_node_id}") nodes = (db_session.query(Node).filter(Node.active == True).order_by( Node.parent_id.desc()).all()) grouped_nodes = defaultdict(list) for node in nodes: grouped_nodes[node.parent_id].append(node) bar = progressbar.ProgressBar() for group, grouped_nodes in bar(grouped_nodes.items()): for i, left_node in enumerate(grouped_nodes): for j, right_node in enumerate(grouped_nodes): if (i != j and f"{left_node.id}--{right_node.id}" not in used_nodes and f"{right_node.id}--{left_node.id}" not in used_nodes): used_nodes.append(f"{left_node.id}--{right_node.id}") for left_utterance in left_node.utterances: for right_utterance in right_node.utterances: do_continue = True if (left_utterance.utterance_text == "" or left_utterance.utterance_text == " "): print( "removing empty utterance", left_utterance.utterance_text, left_utterance.id, ) if left_node.children: raise Exception( "empty string has children. WAT?! :S") db_session.remove(left_utterance) db_session.flush() if not left_node.utterances: print("removing node", left_node.id) db_session.remove(left_node) do_continue = False if (right_utterance.utterance_text == "" or right_utterance.utterance_text == " "): print( "removing empty utterance", right_utterance.utterance_text, right_utterance.id, ) if right_node.children: raise Exception( "empty string has children. WAT?! :S") db_session.remove(right_utterance) db_session.flush() if not right_node.utterances: print("removing node", right_node.id) db_session.remove(right_node) do_continue = False if (do_continue and left_utterance.utterance_text.lower() == right_utterance.utterance_text.lower()): # print('merge', left_utterance.utterance_text, right_utterance.utterance_text) merge_nodes(left_node.id, right_node.id, True) db_session.commit()