def _create_or_get_worker(external_worker_id, source=None):
    worker = db_session.query(Worker) \
        .filter_by(external_worker_id=external_worker_id) \
        .first()
    if not worker:
        worker = Worker(external_worker_id=external_worker_id, source=source)
        db_session.add(worker)
        db_session.flush()
    return worker
def create_jobs(job_type, amount=1):
    if job_type not in ['user', 'system', SPECIES_TAG]:
        raise Exception('work type: "{}" does not exist. Use either system_task or user_task')

    job_filter = [Node.active_child_count == 0, Node.visited_count > 1]
    if job_type == SPECIES_TAG:
        job_filter = [Node.species == SPECIES_TAG, Node.active_child_count < 3]

    nodes = db_session.query(Node) \
        .filter(
            Node.score > 0,
            Node.is_user == (job_type != 'user'),
            Node.active.is_(True),
            *job_filter
        )\
        .order_by(Node.score.desc()) \
        .all()
    created_jobs = []
    for node in nodes:
        history_ids = node.path[-MAX_DIALOGUE_HISTORY:]
        history = db_session\
            .query(Node)\
            .filter(Node.id.in_(history_ids), Node.active.is_(True))\
            .options(joinedload(Node.utterances), joinedload(Node.node_utterances))\
            .order_by(Node.path_length.asc())\
            .all()
        history_length = len(history)
        if len(history_ids) != history_length:
            logger.warning(f'history_ids != history, {history_ids} != {history}')
            continue

        job_node_utterances = []

        for index, history_node in enumerate(history):
            pool_of_node_utterances = []
            for node_utterance in history_node.node_utterances:
                if _check_node_utterance_eligibility(node_utterance, index == history_length - 1, job_type):
                    pool_of_node_utterances.append(node_utterance)
            if pool_of_node_utterances:
                job_node_utterances.append(random.choice(pool_of_node_utterances))
        if len(history_ids) == len(job_node_utterances):
            job = Job(job_type=job_type, persona_sample=get_persona_sample())
            db_session.add(job)
            db_session.flush()
            for i, node_utterance in enumerate(job_node_utterances):
                db_session.add(JobNodeUtterance(job_id=job.id, node_utterance_id=node_utterance.id, position=i))

            created_jobs.append(job)

        if len(created_jobs) == amount:
            break
    db_session.commit()
    print(f'created {len(created_jobs)} jobs')
    return created_jobs
def get_synonym_objects(synonym_path):
    synonym_objects = []
    with open(synonym_path, "r") as f:
        for synonym in f.readlines():
            utterance = (db_session.query(Utterance).filter_by(
                utterance_text=synonym.strip()).first())
            if not utterance:
                utterance = Utterance(utterance_text=synonym.strip())
                db_session.add(utterance)
                db_session.flush()
            synonym_objects.append(utterance)
    return synonym_objects
def delete_node(node_id):
    node = db_session.query(Node).get(node_id)
    for child in node.children:
        delete_node(child.id)
    for node_utterance in node.node_utterances:
        db_session.query(JobNodeUtterance).filter(
            JobNodeUtterance.node_utterance_id == node_utterance.id).delete()
        db_session.query(NodeUtteranceStatus).filter(
            NodeUtteranceStatus.node_utterance_id ==
            node_utterance.id).delete()
        db_session.query(NodeUtteranceStatus).filter(
            NodeUtteranceStatus.referenced_node_utterance_id ==
            node_utterance.id).delete()
        db_session.query(NodeUtteranceWorkerJob).filter(
            NodeUtteranceWorkerJob.node_utterance_id ==
            node_utterance.id).delete()
        db_session.flush()
        db_session.delete(node_utterance)
    db_session.commit()
    db_session.delete(node)
    db_session.commit()
def create_new_node(utterances,
                    source="manual",
                    parent_id=None,
                    commit=False,
                    species=None):
    if type(parent_id) == Node:
        parent = parent_id
    elif parent_id is not None:
        parent = db_session.query(Node).get(parent_id)
    else:
        parent = None
    node = Node(parent=parent, species=species)
    db_session.add(node)
    db_session.flush()
    node.path = (parent.path if parent else []) + [node.id]
    if type(utterances) == str:
        utterances = [utterances]
    for utterance in utterances:
        add_utterance_to_node(utterance, node, source)
    if commit:
        db_session.commit()
    return node
def add_utterance_to_node(utterance_text, node, source):
    if not isinstance(utterance_text, Utterance):
        utterance = (db_session.query(Utterance).filter_by(
            utterance_text=utterance_text).first())

        if not utterance:
            utterance = Utterance(utterance_text=utterance_text)
            db_session.add(utterance)
            db_session.flush()
    else:
        utterance = utterance_text

    node.utterances.append(utterance)
    db_session.flush()

    node_utterance = (db_session.query(NodeUtterance).filter_by(
        node_id=node.id).filter_by(utterance_id=utterance.id).first())

    node_utterance.source = source
    db_session.flush()
    return node_utterance
def exact_match():
    merges = db_session.query(Merging).all()
    used_nodes = []

    for merge in merges:
        used_nodes.append(f"{merge.left_node_id}--{merge.right_node_id}")

    nodes = (db_session.query(Node).filter(Node.active == True).order_by(
        Node.parent_id.desc()).all())
    grouped_nodes = defaultdict(list)

    for node in nodes:
        grouped_nodes[node.parent_id].append(node)

    bar = progressbar.ProgressBar()
    for group, grouped_nodes in bar(grouped_nodes.items()):
        for i, left_node in enumerate(grouped_nodes):
            for j, right_node in enumerate(grouped_nodes):
                if (i != j and f"{left_node.id}--{right_node.id}"
                        not in used_nodes and
                        f"{right_node.id}--{left_node.id}" not in used_nodes):
                    used_nodes.append(f"{left_node.id}--{right_node.id}")
                    for left_utterance in left_node.utterances:
                        for right_utterance in right_node.utterances:
                            do_continue = True
                            if (left_utterance.utterance_text == ""
                                    or left_utterance.utterance_text == " "):
                                print(
                                    "removing empty utterance",
                                    left_utterance.utterance_text,
                                    left_utterance.id,
                                )
                                if left_node.children:
                                    raise Exception(
                                        "empty string has children. WAT?! :S")
                                db_session.remove(left_utterance)
                                db_session.flush()
                                if not left_node.utterances:
                                    print("removing node", left_node.id)
                                    db_session.remove(left_node)
                                do_continue = False

                            if (right_utterance.utterance_text == ""
                                    or right_utterance.utterance_text == " "):
                                print(
                                    "removing empty utterance",
                                    right_utterance.utterance_text,
                                    right_utterance.id,
                                )
                                if right_node.children:
                                    raise Exception(
                                        "empty string has children. WAT?! :S")
                                db_session.remove(right_utterance)
                                db_session.flush()
                                if not right_node.utterances:
                                    print("removing node", right_node.id)
                                    db_session.remove(right_node)
                                do_continue = False

                            if (do_continue
                                    and left_utterance.utterance_text.lower()
                                    == right_utterance.utterance_text.lower()):
                                # print('merge', left_utterance.utterance_text, right_utterance.utterance_text)
                                merge_nodes(left_node.id, right_node.id, True)

    db_session.commit()