Exemple #1
0
    def get(self, request, format=None):

        if self.request.user is None or self.request.user.id is None or (not User.objects.filter(id=self.request.user.id).exists()):
            return Response("Please login first", status=status.HTTP_400_BAD_REQUEST)

        user = User.objects.get(id=self.request.user.id)
        partner = user.pair.first().get_partner(user) if user.pair.exists() else None
        partner_username = request.query_params.get('partner')
        if partner_username and (User.objects.filter(username=partner_username).exists()):
            partner = User.objects.filter(username=partner_username).first()
            if partner.experiment.first() != user.experiment.first():
                return Response("The specified partner does not belong to this experiment",
                                status=status.HTTP_400_BAD_REQUEST)
        dictionary = user.experiment.first().dictionary if user.experiment.exists()\
            else user.pair.first().assignment.experiment.dictionary
        feature_sources = request.query_params.get('feature_source', "system user partner").split(" ")
        feature_num = int(request.query_params.get('feature_num', 30))


        source_map = {
            "system": "system",
            "user": user,
            "partner": partner,
            partner: "partner",
            user: "******",
            None: "system"
        }
        source_list = []
        for feature_source in feature_sources:
                source_list.append(source_map[feature_source])


        try:
            all_distributions = [];
            for source in source_list:
                features = dictionary.get_feature_list([source])

                distributions = []
                distribution_map = {}

                for feature in features:
                    source = feature.source if hasattr(feature, 'source') else "system"

                    item = {
                        "feature_id": feature.id,
                        "feature_index": feature.index,
                        "feature_text": feature.text,
                        "source": source_map[source],
                        "distribution": {},
                        "normalized_distribution": {},
                        "total_count": 0,
                        "entropy": None
                    }

                    if feature.origin:
                        item["origin_message_id"] = feature.origin.id
                        code = feature.get_origin_message_code()
                        if code:
                            item["origin_code_id"] = code.id
                    item = AttributeDict(item)
                    for code in corpus_models.Code.objects.all():
                        item["distribution"][code.text] = 0
                    distributions.append(item)
                    distribution_map[feature.index] = item

                counts = features.filter(messages__code_assignments__isnull=False,
                                         messages__code_assignments__source=user,
                                         messages__code_assignments__is_user_labeled=True,
                                         messages__code_assignments__valid=True)\
                    .values('index', 'text', 'messages__code_assignments__code__id', 'messages__code_assignments__code__text')\
                    .annotate(count=Count('messages')).order_by('id', 'count').all()
                for count in counts:
                    count = AttributeDict(count)
                    distribution_map[count.index]["distribution"][count.messages__code_assignments__code__text] = count.count

                for item in distributions:
                    item["total_count"] = 0
                    for code in item.distribution:
                        item["total_count"] += item.distribution[code]

                    for code in item.distribution:
                        if item["total_count"] > 0:
                            item.normalized_distribution[code] = float(item.distribution[code]) / float(item["total_count"])
                        else:
                            item.normalized_distribution[code] = 0

                    item["entropy"] = entropy(item.distribution)



                # first sort by total count
                distributions.sort(key=attrgetter("total_count"), reverse=True)
                # Then sort by entropy. The order will be equivalent to (entropy, -total_count)
                distributions.sort(key=attrgetter("entropy"))

                all_distributions = all_distributions + distributions[:feature_num]

            output = serializers.FeatureCodeDistributionSerializer(all_distributions, many=True)

            return Response(output.data, status=status.HTTP_200_OK)
        except:
            import traceback
            traceback.print_exc()
            import pdb
            pdb.set_trace()

            return Response(status=status.HTTP_400_BAD_REQUEST)
Exemple #2
0
    def post(self, request, format=None):

        if self.request.user is None or self.request.user.id is None or (not User.objects.filter(id=self.request.user.id).exists()):
            return Response("Please login first", status=status.HTTP_400_BAD_REQUEST)

        user = User.objects.get(id=self.request.user.id)
        dictionary = user.experiment.first().dictionary if user.experiment.exists()\
            else user.pair.first().assignment.experiment.dictionary

        input = serializers.FeatureSerializer(data=request.data)
        if input.is_valid():
            data = input.validated_data

            token_list = data["token_list"]
            origin = data["origin"]
            feature = dictionary.add_feature(token_list, source=user, origin=origin)

            item = {
                "feature_id": feature.id,
                "feature_index": feature.index,
                "feature_text": feature.text,
                "source": "user",
                "distribution": {},
                "normalized_distribution": {},
                "total_count": 0,
                "entropy": None
            }
            if feature.origin:
                item["origin_message_id"] = feature.origin.id
                code = feature.get_origin_message_code()
                if code:
                    item["origin_code_id"] = code.id
            item = AttributeDict(item)
            for code in corpus_models.Code.objects.all():
                item["distribution"][code.text] = 0

            counts = enhance_models.Feature.objects\
                .filter(id=feature.id, messages__code_assignments__isnull=False,
                        messages__code_assignments__is_user_labeled=True,
                        messages__code_assignments__source=user,
                        messages__code_assignments__valid=True)\
                .values('index', 'text', 'messages__code_assignments__code__id', 'messages__code_assignments__code__text')\
                                    .annotate(count=Count('messages')).order_by('id', 'count').all()
            for count in counts:
                count = AttributeDict(count)
                item["distribution"][count.messages__code_assignments__code__text] = count.count


            item["total_count"] = 0
            for code in item.distribution:
                item["total_count"] += item.distribution[code]
            for code in item.distribution:
                if item["total_count"] > 0:
                    item.normalized_distribution[code] = float(item.distribution[code]) / float(item["total_count"])
                else:
                    item.normalized_distribution[code] = 0
            item["entropy"] = entropy(item.distribution)


            output = serializers.FeatureCodeDistributionSerializer(item)

            return Response(output.data, status=status.HTTP_200_OK)

        return Response(input.errors, status=status.HTTP_400_BAD_REQUEST)