예제 #1
0
파일: api_v1.py 프로젝트: ekt68/texta
def api_mass_tagger(request, user, params):
    """ Apply mass tagger (via auth_token)
    """
    # Get parameters with default values
    if 'search' not in params:
        params['search'] = 'all_docs'
    if 'description' not in params:
        params['description'] = "via API call"
    # Paramater projection for preprocessor task
    task_type = TaskTypes.APPLY_PREPROCESSOR
    params["preprocessor_key"] = "text_tagger"
    params["text_tagger_feature_names"] = params['field']
    # Select taggers
    taggers = params.get('taggers', None)
    if taggers is None:
        taggers = [tagger.id for tagger in Task.objects.filter(task_type=TaskTypes.TRAIN_TAGGER.value).filter(status=Task.STATUS_COMPLETED)]
    params['text_tagger_taggers'] = taggers
    # Prepare description
    description = params['description']
    # Create execution task
    task_id = create_task(task_type, description, params, user)
    # Add task to queue
    task = Task.get_by_id(task_id)
    task.update_status(Task.STATUS_QUEUED)
    # Return reference to task
    data = {
        'task_id': task_id,
        'task_type': task_type,
        'status': task.status,
        'user': task.user.username
    }
    data_json = json.dumps(data)
    return HttpResponse(data_json, status=200, content_type='application/json')
예제 #2
0
파일: views.py 프로젝트: texta-tk/texta
def start_task(request):
    user = request.user
    task_type = request.POST['task_type']
    task_params = filter_params(request.POST)

    description = task_params['description']
    if 'dataset' in request.session.keys():
        task_params['dataset'] = request.session['dataset']

    if 'model' in request.session.keys():
        task_params['language_model'] = request.session['model']

    # Create execution task
    task_id = create_task(task_type, description, task_params, user)
    # Add task to queue
    task = Task.get_by_id(task_id)

    if not TaskTypes.hasValue(task_type):    
        task.result = json.dumps({'error': '{} is not a proper Task Type value'.format(task_type)})
        task.update_status(Task.STATUS_FAILED, set_time_completed=True)
    else:
        task.update_status(Task.STATUS_QUEUED)


    return HttpResponse()
예제 #3
0
파일: views.py 프로젝트: ekt68/texta
def start_task(request):
    user = request.user
    task_type = request.POST['task_type']
    task_params = filter_params(request.POST)

    description = task_params['description']
    if 'dataset' in request.session.keys():
        task_params['dataset'] = request.session['dataset']

    if 'model' in request.session.keys():
        task_params['language_model'] = request.session['model']

    # Create execution task
    task_id = create_task(task_type, description, task_params, user)
    # Add task to queue
    task = Task.get_by_id(task_id)

    if not TaskTypes.hasValue(task_type):    
        task.result = json.dumps({'error': '{} is not a proper Task Type value'.format(task_type)})
        task.update_status(Task.STATUS_FAILED, set_time_completed=True)
    else:
        task.update_status(Task.STATUS_QUEUED)


    return HttpResponse()
예제 #4
0
    def start_fact_deleter_task(self, rm_facts_dict, doc_id=None):
        """Remove facts from documents, by starting fact_deleter management task.
        
        Arguments:
            rm_facts_dict {Dict[str: List[str]]} -- Dict of fact values to remove
            Examples:
                General format - { 'factname1': ['factvalue1','factvalue2', ...]}
                Real example - {'CITY': ['tallinna', 'tallinn'], 'CAR': ['bmw', 'audi']}
        
        Keyword Arguments:
            doc_id {str} -- If present, deletes the facts only in a given document (default: {None})
        """
        task_type = TaskTypes.MANAGEMENT_TASK
        description = 'fact_manager_fact_deletion'
        params = {
            'fact_deleter_fact_values': rm_facts_dict,
            'fact_deleter_doc_id': doc_id,
            'task_type': task_type,
            'manager_key': ManagerKeys.FACT_DELETER,
            'description': description,
            'dataset': self.request.session['dataset']
        }

        task_id = create_task(task_type, description, params,
                              self.request.user)
        task = Task.objects.get(pk=task_id)
        task.update_status(Task.STATUS_QUEUED)
예제 #5
0
파일: fact_manager.py 프로젝트: ekt68/texta
    def start_fact_deleter_task(self, rm_facts_dict, doc_id=None):
        """Remove facts from documents, by starting fact_deleter management task.
        
        Arguments:
            rm_facts_dict {Dict[str: List[str]]} -- Dict of fact values to remove
            Examples:
                General format - { 'factname1': ['factvalue1','factvalue2', ...]}
                Real example - {'CITY': ['tallinna', 'tallinn'], 'CAR': ['bmw', 'audi']}
        
        Keyword Arguments:
            doc_id {str} -- If present, deletes the facts only in a given document (default: {None})
        """
        task_type = TaskTypes.MANAGEMENT_TASK
        description = 'fact_manager_fact_deletion'
        params = {
            'fact_deleter_fact_values': rm_facts_dict,
            'fact_deleter_doc_id': doc_id,
            'task_type': task_type,
            'manager_key': ManagerKeys.FACT_DELETER,
            'description': description,
            'dataset': self.request.session['dataset']
        }

        task_id = create_task(task_type, description, params, self.request.user)
        task = Task.objects.get(pk=task_id)
        task.update_status(Task.STATUS_QUEUED)
예제 #6
0
파일: api_v1.py 프로젝트: ekt68/texta
def api_train_tagger(request, user, params):
    """ Create task for train tagger
    """
    task_type = TaskTypes.TRAIN_TAGGER
    description = params['description']
    # Create execution task
    task_id = create_task(task_type, description, params, user)
    # Add task to queue
    task = Task.get_by_id(task_id)
    task.update_status(Task.STATUS_QUEUED)
    # Return reference to task
    data = {
        'task_id': task_id,
        'task_type': task_type,
        'status': task.status,
        'user': task.user.username
    }
    data_json = json.dumps(data)
    return HttpResponse(data_json, status=200, content_type='application/json')
예제 #7
0
파일: api_v1.py 프로젝트: texta-tk/texta
def api_train_model(request, user, params):
    """ Create task for train model
    """
    task_type = TaskTypes.TRAIN_MODEL
    description = params['description']
    # Create execution task
    task_id = create_task(task_type, description, params, user)
    # Add task to queue
    task = Task.get_by_id(task_id)
    task.update_status(Task.STATUS_QUEUED)
    # Return reference to task
    data = {
        'task_id': task_id,
        'task_type': task_type,
        'status': task.status,
        'user': task.user.username
    }
    data_json = json.dumps(data)
    return HttpResponse(data_json, status=200, content_type='application/json')
예제 #8
0
파일: fact_manager.py 프로젝트: ekt68/texta
    def start_fact_adder_task(self, fact_name: str, fact_value: str, fact_field: str, doc_id: str, method: str, match_type: str, case_sens: bool):
        """Adds custom facts to documents, by starting fact_adder management task."""
        task_type = TaskTypes.MANAGEMENT_TASK.value
        description = 'fact_manager_fact_adding'
        params = {
            'fact_name': fact_name,
            'fact_value': fact_value,
            'fact_field': fact_field,
            'doc_id': doc_id,
            'method': method,
            'match_type': match_type,
            'case_sens': case_sens,
            'task_type': task_type,
            'manager_key': ManagerKeys.FACT_ADDER,
            'description': description,
            'dataset': self.request.session['dataset']
        }

        task_id = create_task(task_type, description, params, self.request.user)
        task = Task.objects.get(pk=task_id)
        task.update_status(Task.STATUS_QUEUED)
예제 #9
0
파일: api_v1.py 프로젝트: texta-tk/texta
def api_mass_tagger(request, user, params):
    """ Apply mass tagger (via auth_token)
    """
    # Get parameters with default values
    if 'search' not in params:
        params['search'] = 'all_docs'
    if 'description' not in params:
        params['description'] = "via API call"
    # Paramater projection for preprocessor task
    task_type = TaskTypes.APPLY_PREPROCESSOR
    params["preprocessor_key"] = "text_tagger"
    params["text_tagger_feature_names"] = params['field']
    # Select taggers
    taggers = params.get('taggers', None)
    if taggers is None:
        taggers = [
            tagger.id for tagger in Task.objects.filter(
                task_type=TaskTypes.TRAIN_TAGGER.value).filter(
                    status=Task.STATUS_COMPLETED)
        ]
    params['text_tagger_taggers'] = taggers
    # Prepare description
    description = params['description']
    # Create execution task
    task_id = create_task(task_type, description, params, user)
    # Add task to queue
    task = Task.get_by_id(task_id)
    task.update_status(Task.STATUS_QUEUED)
    # Return reference to task
    data = {
        'task_id': task_id,
        'task_type': task_type,
        'status': task.status,
        'user': task.user.username
    }
    data_json = json.dumps(data)
    return HttpResponse(data_json, status=200, content_type='application/json')
예제 #10
0
    def start_fact_adder_task(self, fact_name: str, fact_value: str,
                              fact_field: str, doc_id: str, method: str,
                              match_type: str, case_sens: bool):
        """Adds custom facts to documents, by starting fact_adder management task."""
        task_type = TaskTypes.MANAGEMENT_TASK.value
        description = 'fact_manager_fact_adding'
        params = {
            'fact_name': fact_name,
            'fact_value': fact_value,
            'fact_field': fact_field,
            'doc_id': doc_id,
            'method': method,
            'match_type': match_type,
            'case_sens': case_sens,
            'task_type': task_type,
            'manager_key': ManagerKeys.FACT_ADDER,
            'description': description,
            'dataset': self.request.session['dataset']
        }

        task_id = create_task(task_type, description, params,
                              self.request.user)
        task = Task.objects.get(pk=task_id)
        task.update_status(Task.STATUS_QUEUED)
예제 #11
0
파일: api_v1.py 프로젝트: ekt68/texta
def api_hybrid_tagger(request, user, params):
    """ Apply hybrid tagger (via auth_token)
    """
    DEFAULT_TAGS_THRESHOLD = 50
    DEFAULT_MAX_TAGGERS = 20

    dataset_id = params['dataset']
    search = params['search']
    field = params['field']
    max_taggers = int(params.get('max_taggers', DEFAULT_MAX_TAGGERS))
    min_count_threshold = int(params.get('min_count_threshold', DEFAULT_TAGS_THRESHOLD))

    if 'description' not in params:
        params['description'] = "via API call"
    # Paramater projection for preprocessor task
    task_type = TaskTypes.APPLY_PREPROCESSOR
    params["preprocessor_key"] = "text_tagger"
    params["text_tagger_feature_names"] = params['field']

    ds = Datasets()
    ds.activate_datasets_by_id(dataset_id, use_default=False)
    # Check if dataset_id is valid
    if not ds.is_active():
        error = {'error': 'invalid dataset parameter'}
        data_json = json.dumps(error)
        return HttpResponse(data_json, status=400, content_type='application/json')

    param_query = json.loads(Search.objects.get(pk=int(search)).query)
    es_m = ds.build_manager(ES_Manager)    
    es_m.load_combined_query(param_query)
    # Get similar documents in a neighborhood of size 1000
    response = es_m.more_like_this_search([field], search_size=1000)
    docs = response['hits']['hits']
    # Build Tag frequency
    tag_freq = {}
    for doc in docs:
        for f in doc['_source'].get('texta_facts', []):
            if f['fact'] == 'TEXTA_TAG' and f['doc_path'] == field:
                doc_tag = f['str_val']
                if doc_tag not in tag_freq:
                    tag_freq[doc_tag] = 0
                tag_freq[doc_tag] += 1

    # Top Tags to limit the number of taggers
    top_tags = [t[0] for t in sorted(tag_freq.items(), key=lambda x: x[1], reverse=True)]
    top_tags = set(top_tags[:max_taggers])
    # Perform tag selection
    data = {'task': {}, 'explain': []}
    candidate_tags = set()
    for tag in tag_freq:
        selected = 0
        count = tag_freq[tag]
        if count >= min_count_threshold and tag in top_tags:
            selected = 1
            candidate_tags.add(tag)
        data['explain'].append({'tag': tag, 
                                'selected': selected, 
                                'count': count })
    # Filter tags
    tagger_search = Task.objects.filter(task_type=TaskTypes.TRAIN_TAGGER.value).filter(status=Task.STATUS_COMPLETED)
    taggers = [tagger.id for tagger in tagger_search if tagger.description in candidate_tags]
    # Create Task if taggers is not zero
    if len(taggers) > 0:
        description = params['description']
        params['text_tagger_taggers'] = taggers
        # Create execution task
        task_id = create_task(task_type, description, params, user)
        # Add task to queue
        task = Task.get_by_id(task_id)
        task.update_status(Task.STATUS_QUEUED)
        # Return reference to task
        data['task'] = {
            'task_id': task_id,
            'task_type': task_type,
            'status': task.status,
            'user': task.user.username
        }
    else:
        # If here, no taggers were selected
        data['task'] = {"error": "no similar documents have tags count above threshold"}
    # Generate response
    data['min_count_threshold'] = min_count_threshold
    data['max_taggers'] = max_taggers
    data_json = json.dumps(data)
    return HttpResponse(data_json, status=200, content_type='application/json')
예제 #12
0
파일: api_v1.py 프로젝트: texta-tk/texta
def api_hybrid_tagger(request, user, params):
    """ Apply hybrid tagger (via auth_token)
    """
    DEFAULT_TAGS_THRESHOLD = 50
    DEFAULT_MAX_TAGGERS = 20

    dataset_id = params['dataset']
    search = params['search']
    field = params['field']
    max_taggers = int(params.get('max_taggers', DEFAULT_MAX_TAGGERS))
    min_count_threshold = int(
        params.get('min_count_threshold', DEFAULT_TAGS_THRESHOLD))

    if 'description' not in params:
        params['description'] = "via API call"
    # Paramater projection for preprocessor task
    task_type = TaskTypes.APPLY_PREPROCESSOR
    params["preprocessor_key"] = "text_tagger"
    params["text_tagger_feature_names"] = params['field']

    ds = Datasets()
    ds.activate_datasets_by_id(dataset_id, use_default=False)
    # Check if dataset_id is valid
    if not ds.is_active():
        error = {'error': 'invalid dataset parameter'}
        data_json = json.dumps(error)
        return HttpResponse(data_json,
                            status=400,
                            content_type='application/json')

    param_query = json.loads(Search.objects.get(pk=int(search)).query)
    es_m = ds.build_manager(ES_Manager)
    es_m.load_combined_query(param_query)
    # Get similar documents in a neighborhood of size 1000
    response = es_m.more_like_this_search([field], search_size=1000)
    docs = response['hits']['hits']
    # Build Tag frequency
    tag_freq = {}
    for doc in docs:
        for f in doc['_source'].get('texta_facts', []):
            if f['fact'] == 'TEXTA_TAG' and f['doc_path'] == field:
                doc_tag = f['str_val']
                if doc_tag not in tag_freq:
                    tag_freq[doc_tag] = 0
                tag_freq[doc_tag] += 1

    # Top Tags to limit the number of taggers
    top_tags = [
        t[0]
        for t in sorted(tag_freq.items(), key=lambda x: x[1], reverse=True)
    ]
    top_tags = set(top_tags[:max_taggers])
    # Perform tag selection
    data = {'task': {}, 'explain': []}
    candidate_tags = set()
    for tag in tag_freq:
        selected = 0
        count = tag_freq[tag]
        if count >= min_count_threshold and tag in top_tags:
            selected = 1
            candidate_tags.add(tag)
        data['explain'].append({
            'tag': tag,
            'selected': selected,
            'count': count
        })
    # Filter tags
    tagger_search = Task.objects.filter(
        task_type=TaskTypes.TRAIN_TAGGER.value).filter(
            status=Task.STATUS_COMPLETED)
    taggers = [
        tagger.id for tagger in tagger_search
        if tagger.description in candidate_tags
    ]
    # Create Task if taggers is not zero
    if len(taggers) > 0:
        description = params['description']
        params['text_tagger_taggers'] = taggers
        # Create execution task
        task_id = create_task(task_type, description, params, user)
        # Add task to queue
        task = Task.get_by_id(task_id)
        task.update_status(Task.STATUS_QUEUED)
        # Return reference to task
        data['task'] = {
            'task_id': task_id,
            'task_type': task_type,
            'status': task.status,
            'user': task.user.username
        }
    else:
        # If here, no taggers were selected
        data['task'] = {
            "error": "no similar documents have tags count above threshold"
        }
    # Generate response
    data['min_count_threshold'] = min_count_threshold
    data['max_taggers'] = max_taggers
    data_json = json.dumps(data)
    return HttpResponse(data_json, status=200, content_type='application/json')