Esempio n. 1
0
 def _to_hit_group_class(results):
     for result in results:
         doc = result["document"]
         prob = result["probabilities"]
         yield HitGroupClass(group_id=doc["group_id"],
                             classes=NaiveBayesClassifier.most_likely(result),
                             probabilities=json.dumps(prob))
Esempio n. 2
0
def hit_group_details(request, hit_group_id):

    try:
        hit_group = HitGroupContent.objects.get(group_id=hit_group_id)
        if RequesterProfile.objects.filter(requester_id=hit_group.requester_id,
            is_public=False):
            raise HitGroupContent.DoesNotExist()
    except HitGroupContent.DoesNotExist:
        messages.info(request, 'Hitgroup with id "{0}" was not found!'.format(
            hit_group_id))
        return redirect('haystack_search')

    try:
        hit_group_class = HitGroupClass.objects.get(group_id=hit_group_id)
    except ObjectDoesNotExist:
        # TODO classification should be done on all models.
        hit_group_class = None
        try:
            with open(settings.CLASSIFIER_PATH, "r") as file:
                classifier = NaiveBayesClassifier(probabilities=json.load(file))
                classified = classifier.classify(hit_group)
                most_likely = classifier.most_likely(classified)
                document = classified["document"]
                hit_group_class = HitGroupClass(
                        group_id=document.group_id,
                        classes=most_likely,
                        probabilities=classified["probabilities"])
                hit_group_class.save()
        except IOError:
            # We do not want make hit group details page unavailable when
            # classifier file does not exist.
            pass

    if hit_group_class is not None:
        hit_group_class_label = NaiveBayesClassifier.label(hit_group_class.classes)
    else:
        hit_group_class_label = NaiveBayesClassifier.label()

    params = {
        'multichart': False,
        'columns': HIT_DETAILS_COLUMNS,
        'title': '#Hits',
        'class': hit_group_class_label,
    }

    def hit_group_details_data_formater(input):
        for cc in input:
            yield {
                'date': cc['start_time'],
                'row': (str(cc['hits_available']),),
            }

    dicts = query_to_dicts(
                """ select start_time, hits_available from hits_mv
                    where group_id = '{}' order by start_time asc """
                .format(hit_group_id))
    data = hit_group_details_data_formater(dicts)
    params['date_from'] = hit_group.occurrence_date
    params['date_to'] = datetime.datetime.utcnow()
    params['data'] = data
    params['hit_group'] = hit_group
    return direct_to_template(request, 'main/hit_group_details.html', params)