def get(self, request, format=None): if self.request.user is None or self.request.user.id is None or (not User.objects.filter(id=self.request.user.id).exists()): return Response("Please login first", status=status.HTTP_400_BAD_REQUEST) user = User.objects.get(id=self.request.user.id) partner = user.pair.first().get_partner(user) if user.pair.exists() else None partner_username = request.query_params.get('partner') if partner_username and (User.objects.filter(username=partner_username).exists()): partner = User.objects.filter(username=partner_username).first() if partner.experiment.first() != user.experiment.first(): return Response("The specified partner does not belong to this experiment", status=status.HTTP_400_BAD_REQUEST) dictionary = user.experiment.first().dictionary if user.experiment.exists()\ else user.pair.first().assignment.experiment.dictionary feature_sources = request.query_params.get('feature_source', "system user partner").split(" ") feature_num = int(request.query_params.get('feature_num', 30)) source_map = { "system": "system", "user": user, "partner": partner, partner: "partner", user: "******", None: "system" } source_list = [] for feature_source in feature_sources: source_list.append(source_map[feature_source]) try: all_distributions = []; for source in source_list: features = dictionary.get_feature_list([source]) distributions = [] distribution_map = {} for feature in features: source = feature.source if hasattr(feature, 'source') else "system" item = { "feature_id": feature.id, "feature_index": feature.index, "feature_text": feature.text, "source": source_map[source], "distribution": {}, "normalized_distribution": {}, "total_count": 0, "entropy": None } if feature.origin: item["origin_message_id"] = feature.origin.id code = feature.get_origin_message_code() if code: item["origin_code_id"] = code.id item = AttributeDict(item) for code in corpus_models.Code.objects.all(): item["distribution"][code.text] = 0 distributions.append(item) distribution_map[feature.index] = item counts = features.filter(messages__code_assignments__isnull=False, messages__code_assignments__source=user, messages__code_assignments__is_user_labeled=True, messages__code_assignments__valid=True)\ .values('index', 'text', 'messages__code_assignments__code__id', 'messages__code_assignments__code__text')\ .annotate(count=Count('messages')).order_by('id', 'count').all() for count in counts: count = AttributeDict(count) distribution_map[count.index]["distribution"][count.messages__code_assignments__code__text] = count.count for item in distributions: item["total_count"] = 0 for code in item.distribution: item["total_count"] += item.distribution[code] for code in item.distribution: if item["total_count"] > 0: item.normalized_distribution[code] = float(item.distribution[code]) / float(item["total_count"]) else: item.normalized_distribution[code] = 0 item["entropy"] = entropy(item.distribution) # first sort by total count distributions.sort(key=attrgetter("total_count"), reverse=True) # Then sort by entropy. The order will be equivalent to (entropy, -total_count) distributions.sort(key=attrgetter("entropy")) all_distributions = all_distributions + distributions[:feature_num] output = serializers.FeatureCodeDistributionSerializer(all_distributions, many=True) return Response(output.data, status=status.HTTP_200_OK) except: import traceback traceback.print_exc() import pdb pdb.set_trace() return Response(status=status.HTTP_400_BAD_REQUEST)
def post(self, request, format=None): if self.request.user is None or self.request.user.id is None or (not User.objects.filter(id=self.request.user.id).exists()): return Response("Please login first", status=status.HTTP_400_BAD_REQUEST) user = User.objects.get(id=self.request.user.id) dictionary = user.experiment.first().dictionary if user.experiment.exists()\ else user.pair.first().assignment.experiment.dictionary input = serializers.FeatureSerializer(data=request.data) if input.is_valid(): data = input.validated_data token_list = data["token_list"] origin = data["origin"] feature = dictionary.add_feature(token_list, source=user, origin=origin) item = { "feature_id": feature.id, "feature_index": feature.index, "feature_text": feature.text, "source": "user", "distribution": {}, "normalized_distribution": {}, "total_count": 0, "entropy": None } if feature.origin: item["origin_message_id"] = feature.origin.id code = feature.get_origin_message_code() if code: item["origin_code_id"] = code.id item = AttributeDict(item) for code in corpus_models.Code.objects.all(): item["distribution"][code.text] = 0 counts = enhance_models.Feature.objects\ .filter(id=feature.id, messages__code_assignments__isnull=False, messages__code_assignments__is_user_labeled=True, messages__code_assignments__source=user, messages__code_assignments__valid=True)\ .values('index', 'text', 'messages__code_assignments__code__id', 'messages__code_assignments__code__text')\ .annotate(count=Count('messages')).order_by('id', 'count').all() for count in counts: count = AttributeDict(count) item["distribution"][count.messages__code_assignments__code__text] = count.count item["total_count"] = 0 for code in item.distribution: item["total_count"] += item.distribution[code] for code in item.distribution: if item["total_count"] > 0: item.normalized_distribution[code] = float(item.distribution[code]) / float(item["total_count"]) else: item.normalized_distribution[code] = 0 item["entropy"] = entropy(item.distribution) output = serializers.FeatureCodeDistributionSerializer(item) return Response(output.data, status=status.HTTP_200_OK) return Response(input.errors, status=status.HTTP_400_BAD_REQUEST)