Exemple #1
0
class TaskSerializer_Simplify(serializers.ModelSerializer):
    user = DjangoUserSerializer_Simplify(many=False, read_only=True)
    project = ProjectSerializer_Simplify(many=False, read_only=True)

    # def get_project(self, obj):
    #   return obj.id + " TODO change into project simple_serializer"

    class Meta:
        model = Tasks
        fields = ('id', 'type', 'user', 'project')
class PassageSerializer(serializers.ModelSerializer):
    created_by = DjangoUserSerializer_Simplify(many=False, read_only=True)
    source = SourceSerializer_Simplify(many=False)

    class Meta:
        model = Passages
        fields = ('id', 'text', 'type', 'source', 'text_direction',
                  'external_id', 'is_active', 'created_by', 'created_at',
                  'updated_at')

    def create(self, validated_data):
        ownerUser = self.initial_data['created_by']
        validated_data['created_by'] = ownerUser

        source = get_object_or_404(Sources,
                                   pk=get_value_or_none(
                                       'id', self.initial_data['source']))

        active_obj_or_raise_exeption(source)
        validated_data['source'] = source

        texts_array = validated_data['text'].split("<DELIMITER>")

        for text in texts_array:
            validated_data['text'] = text
            newPassage = Passages.objects.create(**validated_data)

        return newPassage

    def update(self, instance, validated_data):
        validated_data.pop('source')
        print(validated_data)

        # prevent update asset that used in another asset
        if self.is_used_in_a_task(instance) == False:
            updated_source = get_object_or_404(
                Sources,
                pk=get_value_or_none('id', self.initial_data['source']))
            instance.source = validated_data.get('source', updated_source)
            instance.text = validated_data.get('text', instance.text)

        instance.text_direction = validated_data.get('text_direction',
                                                     instance.text_direction)
        instance.type = validated_data.get('type', instance.type)
        instance.is_active = validated_data.get('is_active',
                                                instance.is_active)
        instance.external_id = validated_data.get('external_id',
                                                  instance.is_active)
        instance.save()
        return instance

    def is_used_in_a_task(self, instance):
        children_list = Tasks.objects.all().filter(passage=instance.id)
        is_parent = len(children_list) > 0
        return is_parent
Exemple #3
0
class SourceSerializer(serializers.ModelSerializer):
    created_by = DjangoUserSerializer_Simplify(many=False, read_only=True)

    class Meta:
        model = Sources
        fields = ('id', 'name', 'text', 'is_active', 'created_by',
                  'created_at', 'updated_at')

    def create(self, validated_data):
        ownerUser = self.initial_data['created_by']
        validated_data['created_by'] = ownerUser
        return Sources.objects.create(**validated_data)
Exemple #4
0
class CategorySerializer(serializers.ModelSerializer):
    created_by = DjangoUserSerializer_Simplify(many=False, read_only=True)

    class Meta:

        model = Categories
        fields = ('id', 'name', 'description', 'abbreviation', 'tooltip',
                  'is_default', 'is_active', 'created_by', 'created_at',
                  'updated_at')

    def create(self, validated_data):
        ownerUser = self.initial_data['created_by']
        validated_data['created_by'] = ownerUser
        return Categories.objects.create(**validated_data)
Exemple #5
0
 def get_user(self,obj):
     return DjangoUserSerializer_Simplify(obj.annotator).data
Exemple #6
0
class TaskInChartSerializer(serializers.ModelSerializer):
    created_by = DjangoUserSerializer_Simplify(many=False, read_only=True)
    passage = PassageSerializer_Simplify(many=False, read_only=True,allow_null=True)
    project = ProjectSerializer_Simplify(many=False, read_only=True,allow_null=False)
    user = serializers.SerializerMethodField()
    parent = serializers.SerializerMethodField()
    children = serializers.SerializerMethodField()
    #out_of_date = serializers.BooleanField(source='out_of_date')
    #out_of_date = serializers.SerializerMethodField() # Added Sep 30
    
    def get_user(self,obj):
        return DjangoUserSerializer_Simplify(obj.annotator).data

    def get_parent(self,obj):
        if obj.parent_task is not None:
            return TaskSerializer_Simplify(obj.parent_task).data
        else:
            return None

    def get_children(self, obj):
        children_tasks = Tasks.objects.all().filter(parent_task_id=obj.id)
        children_json = []
        for cl in children_tasks:
            children_json.append(TaskSerializer_Simplify(cl).data)
        return children_json
    
    """
    def get_out_of_date(self,obj):
        if obj.project.layer.type != Constants.LAYER_TYPES_JSON['ROOT'] and obj.parent_task is not None:
            num_of_submitted_review_tasks = \
                Tasks.objects.all().filter(parent_task_id=obj.parent_task.id,\
                                           type=Constants.TASK_TYPES_JSON['REVIEW'],status=Constants.TASK_STATUS_JSON['SUBMITTED']).count()
            return (num_of_submitted_review_tasks > 0)
        else:
            return False
    """
        
    class Meta:
        model = Tasks
        fields = (
            'id',
            'parent',
            'children',
            'type',
            'status',
            'project',
            'user',
            'passage',
            'is_demo',
            'manager_comment',
            'user_comment',
            'out_of_date',
            'obsoleted_by',
            'parent_obsoleted_by',
            'is_active',
            'created_by',
            'created_at',
            'updated_at'
        )

    def create(self, validated_data):
        ownerUser = self.initial_data['created_by']
        validated_data['created_by'] = ownerUser
        project = get_object_or_404(Projects, pk=self.initial_data['project']['id'])
        annotator = get_object_or_404(Users, pk=get_value_or_none('id',get_value_or_none('user',self.initial_data))) # todo: when coarsening layer task - no need of annotator - check which one to set default?

        parent = None
        if self.initial_data['parent']:
            parent = get_object_or_404(Tasks, pk=get_value_or_none('id', self.initial_data['parent']))
            active_obj_or_raise_exeption(parent)

        newTask = Tasks()
        newTask.created_by = ownerUser
        newTask.parent_task = parent
        newTask.status = Constants.TASK_STATUS_JSON['NOT_STARTED']
        newTask.type = validated_data['type']
        newTask.is_demo = validated_data['is_demo']
        newTask.manager_comment = validated_data['manager_comment']
        newTask.user_comment = validated_data.get('user_comment','')
        
        # Omri Abend (Sep 13)
        # tasks cannot be created with is_active=True if their parent is not submitted
        if (newTask.parent_task and newTask.parent_task.status != Constants.TASK_STATUS_JSON['SUBMITTED']):
            newTask.is_active = False
        else:
            newTask.is_active = validated_data['is_active']

        newTask.project = project
        newTask.annotator = annotator

        if (newTask.type == Constants.TASK_TYPES_JSON['TOKENIZATION']):
            passage = get_object_or_404(Passages, pk=get_value_or_none('id',get_value_or_none('passage',self.initial_data)))
        else:
            passage = self.get_passage_by_parent_task(parent)

        newTask.passage = passage
        newTask.created_at = timezone.now()

        active_obj_or_raise_exeption(project)
        active_obj_or_raise_exeption(annotator)
        active_obj_or_raise_exeption(passage)

        if(newTask.type == Constants.TASK_TYPES_JSON['TOKENIZATION']):
            newTask.save()
            self.generate_and_save_tokens(newTask)
        elif(newTask.type == Constants.TASK_TYPES_JSON['ANNOTATION'] or (newTask.type == Constants.TASK_TYPES_JSON['REVIEW'])):
            if(self.has_parent_task(newTask) and self.parent_task_layer_is_my_parent_layer(newTask)): # and self.is_parent_task_submitted(newTask)):
                self.save_task_by_layer_type(newTask)

        return newTask

    def update(self, instance, validated_data):
        # avoid changing the layer's type
        validated_data['type'] = instance.type

        # Omri Abend (Sep 13)
        # tasks cannot be updated to is_active=True if their parent is not submitted
        if (validated_data['is_active'] and instance.parent_task and instance.parent_task.status != Constants.TASK_STATUS_JSON['SUBMITTED']):
            raise OnlyActiveIfParentIsActiveException
        
        # allow update only status is_demo is_active manager_comment
        
        # continue updating the layer's attributes
        return super(self.__class__, self).update(instance, validated_data)


    def get_passage_by_parent_task(self,parent_task):
        return parent_task.passage

    def is_parent_task_submitted(self,task):
        if task.parent_task.status == Constants.TASK_STATUS_JSON['SUBMITTED']:
            return True
        else:
            raise CreateDerivedAnnotationTaskDeniedException


    def parent_task_layer_is_my_parent_layer(self,task):
        if task.project.layer.parent_layer_id != None: # if im using a derived layer in my new task
            parent_task_layer_id = task.parent_task.project.layer.id
            my_parent_layer_id = task.project.layer.parent_layer_id.id
            print("my_parent_layer_id: "+str(my_parent_layer_id)+" ; parent_task_layer_id: "+str(parent_task_layer_id))
            if my_parent_layer_id == parent_task_layer_id:
                return True
            else:
                raise CreateAnnotationTaskDeniedException
        else:
            return True


    def has_parent_task(self,task):
        if hasattr(task,'parent_task') and task.parent_task != None:
            return True
        else:
            raise CreateAnnotationTaskDeniedException


    def is_parent_of_other_tasks(self, instance):
        children_list = Tasks.objects.all().filter(parent_task_id=instance.id)
        is_parent = len(children_list) > 0
        return is_parent

    def generate_and_save_tokens(self,taskInstance):
        tokens_arr = tokenize(taskInstance.passage.text.replace('\\n','\n'))
        taskInstance.tokens_set.all().delete()
        # self.get_object()
        for token in tokens_arr:
            newToken = Tokens()
            newToken.require_annotation = (not token['is_punctuation'])
            newToken.text = token['text']
            newToken.start_index = token['start_index']
            newToken.end_index = token['end_index']

            taskInstance.tokens_set.add(newToken,bulk=False)

    def save_task_by_layer_type(self,task):
        task_layer = task.project.layer
        if(task_layer.type == Constants.LAYER_TYPES_JSON['ROOT']):
            print('save_task_by_layer_type - ROOT - start')
            task.save()
            print('save_task_by_layer_type - ROOT - end')
        elif (task_layer.type == Constants.LAYER_TYPES_JSON['EXTENSION']):
            print('save_task_by_layer_type - EXTENSION - start')
            task.save()
            print('save_task_by_layer_type - EXTENSION - end')
        elif (task_layer.type == Constants.LAYER_TYPES_JSON['REFINEMENT']):
            print('save_task_by_layer_type - REFINEMENT - start')
            task.save()
            print('save_task_by_layer_type - REFINEMENT - end')
        elif (task_layer.type == Constants.LAYER_TYPES_JSON['COARSENING']):
            self.save_coarsening_task_process(task)

        return task


    def save_coarsening_task_process(self,task):
        print('save_task_by_layer_type - COARSENING - start')
        # make sure that the parent task is not tokenization task
        if (task.parent_task.type == Constants.TASK_TYPES_JSON['TOKENIZATION']):
            raise CreateCoarseningAnnotationTaskDeniedException
        else:
            # set the task type to ONGOING
            task.status = Constants.TASK_STATUS_JSON['ONGOING']

            # save the task
            task.save()

            # e.g. there are 3 categories that bacome 1 category so:
            parent_task_annotation_units = task.parent_task.annotation_units_set.all().order_by('id')
            coarsenned_categories = self.get_coarsenned_categories(task.project.layer)
            remote_units_array = []
            # go over all of the annotation units in this task
            # - for each unit, if one of the old categories exists - replace it with the new category
            for parent_au in parent_task_annotation_units:
                parent_au = Annotation_UnitsSerializer(parent_au).data
                annotation_unit = Annotation_Units()
                annotation_unit.tree_id = parent_au['tree_id']
                annotation_unit.task_id = Tasks.objects.get(id=parent_au['task_id'])
                annotation_unit.type = parent_au['type']
                annotation_unit.comment = parent_au['comment']
                annotation_unit.gui_status = parent_au['gui_status']
                annotation_unit.is_remote_copy = parent_au['is_remote_copy']

                parent_annotation_unit = self.get_parent_annotation_unit_or_none(parent_au['parent_id'],task.id)

                annotation_unit.parent_id = parent_annotation_unit

                task.annotation_units_set.add(annotation_unit, bulk=False)
                self.save_coarsening_annotation_categories(annotation_unit, parent_au, coarsenned_categories)

                # check if i have a remote units
                remote_units = Annotation_Remote_Units_Annotation_Units.objects.all().filter(unit_id=parent_au['id'])
                for ru in remote_units:
                    # retrieve its original unit
                    remote_original_unit = Annotation_Units.objects.get(id=ru.remote_unit_id.id, task_id=task.parent_task.id)

                    remote_original_unit_in_coarsening_task = Annotation_Units.objects.get(
                        tree_id=remote_original_unit.tree_id, task_id=task.id)

                    unit_id_remote_original_unit = Annotation_Units.objects.get(
                        id=ru.unit_id.id, task_id=task.parent_task.id)

                    unit_id_remote_original_unit_in_coarsening_task = Annotation_Units.objects.get(
                        tree_id=unit_id_remote_original_unit.tree_id, task_id=task.id)

                    # set the remote is_remote_copy = true
                    remote_original_unit_in_coarsening_task.is_remote_copy = True

                    # set the parent_id to be the remote's one
                    remote_original_unit_in_coarsening_task.parent_id = unit_id_remote_original_unit_in_coarsening_task

                    # get its original categories
                    originial_remote_categories = Annotation_Units_Categories.objects.all().filter(
                        unit_id=remote_original_unit,remote_parent_id=unit_id_remote_original_unit)

                    # set the remote categories to the coarsening task
                    remote_categories = []
                    for cat in originial_remote_categories:
                        annotation_unit_categories = Annotation_Units_Categories()
                        annotation_unit_categories.unit_id = remote_original_unit_in_coarsening_task
                        annotation_unit_categories.remote_parent_id = unit_id_remote_original_unit_in_coarsening_task
                        annotation_unit_categories.category_id = cat.category_id
                        annotation_unit_categories.save()
                        remote_categories.append(annotation_unit_categories)

                        # TODO: save coaesened category to remote units in coarsening task
                        if cat.remote_parent_id != None:
                            # if the category is the coarsened one, add it to the annotation unit
                            coarsend_category = self.get_coarsening_layer_category_or_none(coarsenned_categories, {'id':cat.category_id.id})
                            if coarsend_category is not None:
                                coarsend_unit_category = Annotation_Units_Categories()
                                coarsend_unit_category.unit_id = remote_original_unit_in_coarsening_task
                                coarsend_unit_category.category_id = coarsend_category
                                coarsend_unit_category.remote_parent_id = unit_id_remote_original_unit_in_coarsening_task
                                try:
                                  coarsend_unit_category.save()
                                except:
                                  print("already saved")

                    remote_original_unit_in_coarsening_task.remote_categories = remote_categories

                    # add the remote original unit to the json output
                    remote_units_array.append(remote_original_unit_in_coarsening_task)

            for annotation_remote_unit in remote_units_array:
                remote_unit = self.save_coarsening_annotation_remote_unit(annotation_remote_unit)
                self.save_coarsening_remote_annotation_categories(remote_unit, annotation_remote_unit.remote_categories)

            # set the task status to SUBMITTED
            task.status = Constants.TASK_STATUS_JSON['SUBMITTED']
            task.save()

        print('save_task_by_layer_type - COARSENING - end')

    def save_coarsening_remote_annotation_categories(self,remote_annotation_unit,categories):
        print('save_remote_annotation_categories - start')
        for cat in categories:
          unit_category = Annotation_Units_Categories()
          unit_category.unit_id = remote_annotation_unit.remote_unit_id
          unit_category.category_id = cat.category_id
          unit_category.remote_parent_id = remote_annotation_unit.unit_id
          try:
              unit_category.save()
          except:
              print('already saved remote category')
        print('save_remote_annotation_categories - end')

    def save_coarsening_annotation_remote_unit(self, annotation_unit):
        print("save_coarsening_annotation_remote_unit - start")
        remote_unit = Annotation_Remote_Units_Annotation_Units()
        # remote_unit.unit_id means that it is the parent
        remote_unit.unit_id = annotation_unit.parent_id
        # remote_unit.remote_unit_id means that it is the remote unit
        remote_unit_id = get_object_or_404(Annotation_Units,
                                           tree_id=annotation_unit.tree_id,
                                           task_id=annotation_unit.task_id)
        remote_unit.remote_unit_id = remote_unit_id
        remote_unit.save()
        print("save_coarsening_annotation_remote_unit - end")
        return remote_unit

    def save_coarsening_annotation_categories(self,annotation_unit,parent_au,coarsenned_categories):
        # add and duplicate all the parents annotation_units categories
        if parent_au['categories'] is not None:
            for cat in parent_au['categories']:
                unit_category = Annotation_Units_Categories()
                unit_category.unit_id = annotation_unit
                unit_category.category_id = Categories.objects.get(id=cat['id'])
                unit_category.remote_parent_id = None  # TODO: can it be other then none ?
                unit_category.save()

                # if the category is the coarsened one, add it to the annotation unit
                coarsend_category = self.get_coarsening_layer_category_or_none(coarsenned_categories, cat)
                if coarsend_category is not None:
                    coarsend_unit_category = Annotation_Units_Categories()
                    coarsend_unit_category.unit_id = annotation_unit
                    coarsend_unit_category.category_id = coarsend_category
                    coarsend_unit_category.remote_parent_id = None  # TODO: can it be other then none ?
                    try:
                        coarsend_unit_category.save()
                    except:
                        print("already saved")

    def get_parent_annotation_unit_or_none(self,parent_au_id,task_id):
        try:
            return Annotation_Units.objects.get(tree_id=parent_au_id, task_id=task_id)
        except:
            return None

    def get_coarsenned_categories(self,layer):
        # changed_categories object = {"parent_category_id" : "into_category_id"}
        changed_categories = {}

        derived_layer_categories = Derived_Layers_Categories_Categories.objects.all().filter(layer_id=layer.id)
        for d_l_cat in derived_layer_categories :
            changed_categories[d_l_cat.parent_category_id_id] = d_l_cat.category_id_id

        return changed_categories

    def get_coarsening_layer_category_or_none(self,coarsenned_categories,cat):
        try:
            if coarsenned_categories[cat['id']] is not None:
                cat_to_add = Categories.objects.get(id=coarsenned_categories[cat['id']])
                # TODO: check if the cat_to_add already excsit in the parent_au
                # parent_au['categories'].append(cat_to_add)
        except:
            # cat_to_add = Categories.objects.get(id=cat['id'])
            cat_to_add = None
        return cat_to_add
class TaskSerializerAnnotator(serializers.ModelSerializer):
    created_by = DjangoUserSerializer_Simplify(many=False, read_only=True)
    passage = PassageSerializer(many=False, read_only=True, allow_null=True)
    project = ProjectSerializerForAnnotator(many=False, read_only=True, allow_null=False)
    user = serializers.SerializerMethodField()
    parent = serializers.SerializerMethodField()
    children = serializers.SerializerMethodField()
    tokens = serializers.SerializerMethodField()
    annotation_units = serializers.SerializerMethodField() 
    is_active = serializers.SerializerMethodField()
    user_comment = serializers.SerializerMethodField()
    
    def get_is_active(self,obj):
        if not obj.is_active:
            raise GetForInactiveTaskException
        return obj.is_active

    def get_user(self,obj):
        return DjangoUserSerializer_Simplify(obj.annotator).data

    def get_parent(self,obj):
        if obj.parent_task is not None:
            return TaskInChartSerializer(obj.parent_task).data
        else:
            return None

    def get_children(self, obj):
        children_tasks = Tasks.objects.all().filter(parent_task_id=obj.id)
        children_json = []
        for cl in children_tasks:
            children_json.append(TaskInChartSerializer(cl).data)
        return children_json

    def get_user_comment(self, obj):
        if (obj.status == Constants.TASK_STATUS_JSON['NOT_STARTED'] and obj.type == Constants.TASK_TYPES_JSON['REVIEW']):
            return obj.parent_task.user_comment
        else:
            return obj.user_comment

    def get_tokens(self, obj):
        data = None
        if obj.annotation_json and obj.type != Constants.TASK_TYPES_JSON['TOKENIZATION']:
            #logger.info("tokens_json detected")
            #data = json.loads(obj.tokens_json.tokens_json)
            data = json.loads(obj.annotation_json.annotation_json)
            logger.info(str(data))
            if len(data) > 0:
                data = data[0]['children_tokens']
        if not data:
            logger.info("tokens_json not detected")
            data = self._get_tokens(obj)
            # tokenization tasks do not have a Tokens_Json
            # There is no need for this, since the Tokens_Json is used for caching for quick GET in the annotation
            # task.
            #if obj.type != Constants.TASK_TYPES_JSON['TOKENIZATION']:
            #    tj = Tokens_Json.objects.create(task=obj, tokens_json=data_json)
            #    obj.tokens_json = tj
            #    obj.save()
        return data

    def _get_tokens(self, obj):
        obj_tokens = Tokens.objects.all().filter(task_id=obj.id).order_by('start_index')
        if (obj_tokens or obj.type == Constants.TASK_TYPES_JSON['TOKENIZATION']):
            tokens = obj_tokens
        elif obj.type == Constants.TASK_TYPES_JSON['REVIEW']:
            tokens = []
            parent_task = obj
            while len(tokens) == 0:
                tokens = Tokens.objects.all().filter(task_id=parent_task).order_by("start_index")
                parent_task = parent_task.parent_task
        else:
            root_tokeniztion_task_id = self.get_root_task(obj)
            tokens = Tokens.objects.all().filter(task_id=root_tokeniztion_task_id).order_by("start_index")
        tokens_json = []
        for index,t in enumerate(tokens):
            cur_json = TokensSerializer(t,context={'index_in_task':index}).data
            tokens_json.append(cur_json)
        return tokens_json



    def get_annotation_units(self, obj):
        logger.info("get_annotation_units accessed")
        # **********************************
        #           AS ARRAY
        # **********************************

        def create_annotation(obj):
            # TODO: Place all this in an internal function
            orig_obj = None
            annotation_units = Annotation_Units.objects.all().filter(task_id=obj.id)

            # handle new refinement or extention layer taks - get the parent annotation units - start
            if (len(annotation_units) == 0 and obj.parent_task is not None):  # TODO: check if coarsening task is ok with that
                # get the parent task annotation units
                orig_obj = obj
                obj = obj.parent_task
                annotation_units = Annotation_Units.objects.all().filter(task_id=obj.id)

            annotation_units = annotation_units.select_related('parent_id')
            # handle new refinement or extention layer taks - get the parent annotation units - end

            annotation_units_json = []
            remote_annotation_unit_array = []
            for au in annotation_units:
                # set as default is_remote_copy = False
                au.is_remote_copy = False

                # check if i have a remote units
                remote_units = Annotation_Remote_Units_Annotation_Units.objects.filter(unit_id=au).select_related(
                    'remote_unit_id')
                for ru in remote_units:
                    # retrieve its original unit
                    remote_original_unit = ru.remote_unit_id  # Annotation_Units.objects.get(id = ru.remote_unit_id.id, task_id=obj.id)
                    # set the remote is_remote_copy = true
                    remote_original_unit.is_remote_copy = True
                    # set the parent_id to be the remote's one
                    remote_original_unit.parent_id = ru.unit_id
                    # setting the cloned_from tree_id
                    cloned_from_tree_id = remote_original_unit.tree_id
                    # set the tree_id to be that of the remote unit
                    remote_original_unit.tree_id = ru.remote_unit_tree_id
                    # add the remote original unit to the json output
                    annotation_units_json.append(Annotation_UnitsSerializer(remote_original_unit, context={
                        'cloned_from_tree_id': cloned_from_tree_id}).data)

                au_data = Annotation_UnitsSerializer(au).data

                if (orig_obj and orig_obj.project.layer.type != Constants.LAYER_TYPES_JSON['ROOT']):
                    # take Annotation_UnitsSerializer(au).data, and alter slot to be 3+
                    for index, cat in enumerate(au_data['categories']):
                        au_data['categories'][index]['slot'] = 3 + index
                annotation_units_json.append(au_data)

            # return all array sorted with all the remote units in the end

            annotation_units_json.sort(key=lambda x: tuple([int(a) for a in x['tree_id'].split('-')]))
            return annotation_units_json

        if obj.annotation_json:
            data = json.loads(obj.annotation_json.annotation_json)
        else:
            data = create_annotation(obj)
            data_json = json.dumps(data)
            aj = Annotation_Json.objects.create(task=obj, annotation_json=data_json)
            obj.annotation_json = aj
            obj.save()
        return data

        #return sorted(annotation_units_json, key=operator.itemgetter('is_remote_copy'), reverse=False)

        # **********************************
        #           AS ROOT OBJECT
        # **********************************
        # try:
        #     au = Annotation_Units.objects.get(task_id_id=obj.id, parent_id=None)
        # except Annotation_Units.DoesNotExist:
        #     au = None
        # return Annotation_UnitsSerializer(au).data

    def get_root_task(self,task_instance):
        root_task = task_instance
        while (root_task.parent_task != None ):
            root_task = root_task.parent_task
        return root_task.id

    class Meta:
        model = Tasks
        fields = (
            'id',
            'parent',
            'children',
            'type',
            'project',
            'user',
            'passage',
            'tokens',
            'annotation_units',
            'is_demo',
            'manager_comment',
            'user_comment',
            'is_active',
            'created_by',
            'created_at',
            'updated_at'
        )

    def update(self, instance, validated_data):
        if instance.status == 'SUBMITTED':
            raise CantChangeSubmittedTaskExeption

        save_type = self.context['save_type']
        if(save_type  == 'draft'):
            self.save_draft(instance)
        elif (save_type  == 'submit'):
            self.submit(instance)
        elif (save_type == 'reset'):
            self.reset(instance)
    
        return instance

    def reset(self,instance):
        # TODO: Clear instance.annotation_json
        instance.annotation_json = None
        instance.tokens_json = None
        instance.status = Constants.TASK_STATUS_JSON['NOT_STARTED']
        instance.user_comment = ''
        if (instance.type == Constants.TASK_TYPES_JSON['TOKENIZATION']):
            self.reset_tokenization_task(instance)
        else:
            self.reset_current_task(instance)
        instance.save()

    def save_draft(self,instance):
        instance.status = 'ONGOING'
        print('save_draft')
        if (instance.type == Constants.TASK_TYPES_JSON['TOKENIZATION']):
            self.save_tokenization_task(instance)
        elif instance.type in [Constants.TASK_TYPES_JSON['ANNOTATION'],Constants.TASK_TYPES_JSON['REVIEW']]:
            self.validate_annotation_task(instance)
            data_json = json.dumps(self.initial_data['annotation_units'])
            aj = Annotation_Json.objects.create(task=instance, annotation_json=data_json)
            instance.annotation_json = aj
            tokens_json = json.dumps(self.initial_data['tokens'])
            tj = Tokens_Json.objects.create(task=instance, tokens_json=tokens_json)
            instance.tokens_json = tj
            instance.user_comment = self.initial_data['user_comment']
        # elif (instance.type == Constants.TASK_TYPES_JSON['REVIEW']):
        #     self.validate_annotation_task(instance)
        #     data_json = json.dumps(self.initial_data['annotation_units'])
        #     aj = Annotation_Json.objects.create(task=instance, annotation_json=data_json)
        #     instance.annotation_json = aj
        #     tokens_json = json.dumps(self.initial_data['tokens'])
        #     tj = Tokens_Json.objects.create(task=instance, tokens_json=tokens_json)
        #     instance.tokens_json = tj
        #     instance.user_comment = self.initial_data['user_comment']
        instance.save()

    def reset_tokenization_task(self,instance):
        self.check_if_parent_task_ok_or_exception(instance)
        instance.tokens_set.all().delete()

    def reset_annotation_task(self,instance):
        self.check_if_parent_task_ok_or_exception(instance)
        instance.tokens_set.all().delete()

    def save_tokens(self, instance):
        instance.tokens_set.all().delete()
        tokens = []
        old_to_new_token_id_map = dict()
        for token in self.initial_data['tokens']:
            newToken = Tokens()
            newToken.task_id_id = instance
            newToken.text = token['text']
            newToken.require_annotation = not isPunct(newToken.text)
            newToken.start_index = token['start_index']
            newToken.end_index = token['end_index']
            instance.tokens_set.add(newToken,bulk=False)
            old_to_new_token_id_map[token['id']] = newToken.id
            tokens.append(newToken)
        return tokens, old_to_new_token_id_map


    def save_tokenization_task(self,instance):
        print('save_tokenization_task - start')
        self.check_if_parent_task_ok_or_exception(instance)
        instance.tokens_set.all().delete()
        for token in self.initial_data['tokens']:
            newToken = Tokens()
            newToken.task_id_id = instance
            newToken.text = token['text']
            newToken.require_annotation = not isPunct(newToken.text)
            newToken.start_index = token['start_index']
            newToken.end_index = token['end_index']
            instance.tokens_set.add(newToken,bulk=False)
        print('save_tokenization_task - end')


    # def save_annotation_task(self, instance):
    #     # TODO: Split into validate_annotation_task and save_annotation_task
    #     # validate_annotation_task only validates the initial_data without reading or writing to the database
    #     # self.initial_data is the JSON received from the frontend
    #     print('save_annotation_task - start')
    #     logger.info('save_annotation_task - start')
    #
    #     # mainly saving an annotations units array
    #     self.check_if_parent_task_ok_or_exception(instance)  # Validation
    #     self.reset_current_task(instance)  # DB
    #     remote_units_array = []
    #     instance.user_comment = self.initial_data['user_comment']  # DB
    #
    #     # validating tokens
    #     tokens = self.initial_data['tokens']
    #     if not strictly_increasing([x['start_index'] for x in tokens]):
    #         raise TokensInvalid("tokens should be ordered by their start_index")
    #     tokens_id_to_startindex = dict([(x['id'], x['start_index']) for x in tokens])
    #     children_tokens_list_for_validation = []
    #     for au in self.initial_data['annotation_units']:
    #         cur_children_tokens = au.get('children_tokens')
    #         try:
    #             if cur_children_tokens:
    #                 cur_children_tokens_start_indices = [tokens_id_to_startindex[x['id']] for x in cur_children_tokens]
    #             else:
    #                 if au['type'] == 'IMPLICIT' or au['tree_id'] == '0':
    #                     cur_children_tokens_start_indices = None
    #                 else:
    #                     raise TokensInvalid("Only implicit units may not have a children_tokens field")
    #         except KeyError:
    #             raise TokensInvalid("children_tokens contains a token which is not in the task's tokens list.")
    #         children_tokens_list_for_validation.append((au['tree_id'],(au['parent_tree_id'],au['is_remote_copy'],cur_children_tokens_start_indices)))
    #
    #
    #     print("children_tokens_list_for_validation: "+str(cur_children_tokens_start_indices))
    #     if not check_children_tokens(children_tokens_list_for_validation):
    #         raise TokensInvalid("Inconsistency in children_tokens detected.")
    #
    #     all_tree_ids = [] # a list of all tree_ids by their order in the input
    #
    #     annotation_unit_map = {}  # tree_id -> annotation_unit object
    #
    #     for au in self.initial_data['annotation_units']:
    #         annotation_unit = Annotation_Units()
    #         if is_correct_format_tree_id(au['tree_id']):
    #             annotation_unit.tree_id = au['tree_id']
    #             all_tree_ids.append(au['tree_id'])
    #             annotation_unit_map[annotation_unit.tree_id] = annotation_unit
    #         else:
    #             raise TreeIdInvalid("tree_id is in an incorrect format; fix unit " + str(annotation_unit.tree_id))
    #
    #         annotation_unit.task_id = instance
    #         if au['type'] in [x[0] for x in Constants.ANNOTATION_UNIT_TYPES]:
    #             annotation_unit.type = au['type']
    #         else:
    #             raise UnallowedValueError("An annotation unit is given an unallowed type: "+au['type'])
    #
    #         annotation_unit.comment = au['comment']
    #         annotation_unit.cluster = au['cluster']
    #
    #         annotation_unit.is_remote_copy = au['is_remote_copy']
    #
    #         parent_id = None
    #         if au['parent_tree_id']:
    #             if not is_correct_format_tree_id(au['parent_tree_id']):
    #                 raise TreeIdInvalid("parent_tree_id is in an incorrect format; fix unit "+str(annotation_unit.tree_id))
    #             if not is_correct_format_tree_id_child(au['parent_tree_id'],au['tree_id']):
    #                 raise TreeIdInvalid("parent_tree_id and tree_id do not match in format; fix unit " + str(annotation_unit.tree_id))
    #
    #             # parent_id = get_object_or_404(Annotation_Units, tree_id=au['parent_tree_id'],task_id=instance.id)
    #             parent_id = annotation_unit_map[au['parent_tree_id']]
    #         else:
    #            if annotation_unit.tree_id != '0':
    #                raise TreeIdInvalid("All annotation units but unit 0 must have a valid, non-null tree_id; fix unit "+str(annotation_unit.tree_id))
    #
    #         annotation_unit.parent_id = parent_id
    #         annotation_unit.gui_status = au['gui_status']
    #
    #         if annotation_unit.is_remote_copy:
    #
    #             annotation_unit.remote_categories = get_value_or_none('categories', au)
    #             if au['cloned_from_tree_id']:
    #                 if not is_correct_format_tree_id(au['cloned_from_tree_id']):
    #                     raise TreeIdInvalid("cloned_from_tree_id is in an incorrect format; fix unit " + str(
    #                         annotation_unit.tree_id))
    #                 annotation_unit.cloned_from_tree_id = au['cloned_from_tree_id']
    #             else:
    #                 raise TreeIdInvalid("cloned_from_tree_id should be defined for all remote units")
    #             remote_units_array.append(annotation_unit)
    #         else: # not a remote unit
    #             if au['cloned_from_tree_id']:
    #                 raise TreeIdInvalid("cloned_from_tree_id should not be defined for non-remote units")
    #             instance.annotation_units_set.add(annotation_unit,bulk=False)
    #             # The following two functions just save data and do not validate anything
    #             self.save_children_tokens(annotation_unit, get_value_or_none('children_tokens', au),tokens_id_to_startindex)
    #             self.save_annotation_categories(annotation_unit, get_value_or_none('categories', au))
    #
    #     if not is_tree_ids_uniq_and_consecutive(all_tree_ids):
    #         raise TreeIdInvalid("tree_ids within a unit should be unique and consecutive")
    #
    #     for annotation_unit in remote_units_array:
    #         # TODO: Check if these functions do any validation
    #         remote_unit = self.save_annotation_remote_unit(annotation_unit)
    #         self.save_remote_annotation_categories(remote_unit,annotation_unit.remote_categories)
    #
    #     print('save_annotation_task - end')
    #     logger.info('save_annotation_task - end')

    def save_annotation_task(self, instance):
        # self.initial_data is the JSON received from the frontend
        print('save_annotation_task - start')
        logger.info('save_annotation_task - start')

        # mainly saving an annotations units array
        self.reset_current_task(instance)
        remote_units_array = []
        instance.user_comment = self.initial_data['user_comment']

        # save the new tokens instead of the old ones
        tokens, old_to_new_token_id_map= self.save_tokens(instance)

        all_tree_ids = []  # a list of all tree_ids by their order in the input
        annotation_unit_map = {}  # tree_id -> annotation_unit object

        for au in self.initial_data['annotation_units']:
            annotation_unit = Annotation_Units()
            if is_correct_format_tree_id(au['tree_id']):
                annotation_unit.tree_id = au['tree_id']
                all_tree_ids.append(au['tree_id'])
                annotation_unit_map[annotation_unit.tree_id] = annotation_unit

            annotation_unit.task_id = instance
            if au['type'] in [x[0] for x in Constants.ANNOTATION_UNIT_TYPES]:
                annotation_unit.type = au['type']

            annotation_unit.comment = au['comment']
            annotation_unit.cluster = au['cluster']
            annotation_unit.is_remote_copy = au['is_remote_copy']

            # parent_id = get_object_or_404(Annotation_Units, tree_id=au['parent_tree_id'],task_id=instance.id)
            parent_id = annotation_unit_map[au['parent_tree_id']] if au['parent_tree_id'] else None

            annotation_unit.parent_id = parent_id
            annotation_unit.gui_status = au['gui_status']

            if annotation_unit.is_remote_copy:
                annotation_unit.remote_categories = get_value_or_none('categories', au)
                if au['cloned_from_tree_id']:
                    annotation_unit.cloned_from_tree_id = au['cloned_from_tree_id']
                remote_units_array.append(annotation_unit)
            else:  # not a remote unit
                instance.annotation_units_set.add(annotation_unit, bulk=False)
                self.save_children_tokens(annotation_unit, get_value_or_none('children_tokens', au), old_to_new_token_id_map)
                self.save_annotation_categories(annotation_unit, get_value_or_none('categories', au))

        for annotation_unit in remote_units_array:
            remote_unit = self.save_annotation_remote_unit(annotation_unit)
            self.save_remote_annotation_categories(remote_unit, annotation_unit.remote_categories)

        instance.save()
        print('save_annotation_task - end')
        logger.info('save_annotation_task - end')

    def validate_annotation_task(self, instance):
        # validate_annotation_task only validates the initial_data without reading or writing to the database
        # self.initial_data is the JSON received from the frontend
        print('validate_annotation_task - start')
        logger.info('validate_annotation_task - start')

        if not self.initial_data['id'] == instance.id:
            raise DiscrepancyBetweenTaskIdsException('Task id must me the same, of the instance and of the initial data')

        self.check_if_parent_task_ok_or_exception(instance)

        # validating tokens
        tokens = self.initial_data['tokens']
        if not is_increasing([x['start_index'] for x in tokens],strict=True):
            raise TokensInvalid("tokens should be ordered by their start_index")
        tokens_id_to_startindex = dict([(x['id'], x['start_index']) for x in tokens])
        children_tokens_list_for_validation = []
        largest_index_in_task_tokens = self.initial_data['tokens'][-1]['start_index']
        for au in self.initial_data['annotation_units']:
            cur_children_tokens = au.get('children_tokens')
            try:
                if cur_children_tokens:
                    start_indices = \
                        [tokens_id_to_startindex[children_token['id']] for children_token in cur_children_tokens]
                    if any(start_index > largest_index_in_task_tokens for start_index in start_indices):
                        raise TokensInvalid("Invalid start index in unit %s, larger then the biggest token" % au['tree_id'])
                    if len(start_indices) > len(set(start_indices)):
                        raise TokensInvalid("Duplicate start index in children tokens in unit %s" % au['tree_id'])
                    cur_children_tokens_start_indices = [tokens_id_to_startindex[x['id']] for x in cur_children_tokens]
                else:
                    if au['type'] == 'IMPLICIT':
                        cur_children_tokens_start_indices = None
                    else:
                        raise TokensInvalid("Only implicit units may not have a children_tokens field. Annotation unit %s does not contain children_tokens" %au['tree_id'])
            except KeyError:
                raise TokensInvalid("children_tokens contains a token which is not in the task's tokens list.")
            children_tokens_list_for_validation.append(
                (au['tree_id'], (au['parent_tree_id'], au['is_remote_copy'], cur_children_tokens_start_indices)))

            if au['parent_tree_id']:
                if not is_correct_format_tree_id(au['parent_tree_id']):
                    raise TreeIdInvalid(
                        "parent_tree_id is in an incorrect format; fix unit " + str(au['tree_id']))
                if not is_correct_format_tree_id_child(au['parent_tree_id'], au['tree_id']):
                    raise TreeIdInvalid(
                        "parent_tree_id and tree_id do not match in format; fix unit " + str(au['tree_id']))
            else:
                if au['tree_id'] != '0':
                    raise TreeIdInvalid(
                        "All annotation units but unit 0 must have a valid, non-null tree_id; fix unit " + str(
                            au['tree_id']))

        print("children_tokens_list_for_validation: " + str(cur_children_tokens_start_indices))
        if not check_children_tokens(children_tokens_list_for_validation):  # will never happens
            raise TokensInvalid("Inconsistency in children_tokens detected.")

        all_tree_ids = []  # a list of all tree_ids by their order in the input

        for au in self.initial_data['annotation_units']:
            if is_correct_format_tree_id(au['tree_id']):
                all_tree_ids.append(au['tree_id'])
            else:
                raise TreeIdInvalid("tree_id is in an incorrect format; fix unit " + str(au['tree_id']))

            if not (au['type'] in [x[0] for x in Constants.ANNOTATION_UNIT_TYPES]):
                raise UnallowedValueError("An annotation unit is given an unallowed type: " + au['type'])

            # if au['type'] == 'IMPLICIT' and au['gui_status'] != "OPEN":
            #     raise UnallowedValueError("Remotes and implicit units must have a gui_status OPEN")


            if au['is_remote_copy']:
                if au['cloned_from_tree_id']:
                    if not is_correct_format_tree_id(au['cloned_from_tree_id']):
                        raise TreeIdInvalid("cloned_from_tree_id is in an incorrect format; fix unit " + str(
                            au['tree_id']))
                else:
                    raise TreeIdInvalid("cloned_from_tree_id should be defined for all remote units")
            else:  # not a remote unit
                if au['cloned_from_tree_id']:
                    raise TreeIdInvalid("cloned_from_tree_id should not be defined for non-remote units")

            # OMRI: buggy
            #if au['gui_status'] == 'HIDDEN' and '-' in au['tree_id']:
            #    raise TreeIdInvalid("annotation unit " + str(au['tree_id']) + " has HIDDEN gui status, should not be an internal unit")

            #if au['is_remote_copy'] or au['type'] == 'IMPLICIT':
            #    if au['gui_status'] != 'OPEN':
            #        raise RemoteIsNotOpen('remote or implicit unit ' + str(au['tree_id']) + ' should have an OPEN gui status')

        if not is_tree_ids_uniq_and_consecutive(all_tree_ids):
            raise TreeIdInvalid("tree_ids within a unit should be unique and consecutive")

        print('validate_annotation_task - end')
        logger.info('validate_annotation_task - end')

    def save_remote_annotation_categories(self,remote_annotation_unit,categories):
        print('save_remote_annotation_categories - start')
        for cat in categories:
            unit_category = Annotation_Units_Categories()
            unit_category.unit_id = remote_annotation_unit.remote_unit_id
            unit_category.category_id = Categories.objects.get(id=cat['id'])
            unit_category.remote_parent_id = remote_annotation_unit.unit_id

            # Omri added Sep 12:
            if 'slot' in cat:    # Omri TODO: disallow the option not to specify a slot
                unit_category.slot = cat['slot']
            else:
                unit_category.slot = 1

            unit_category.save()
        print('save_remote_annotation_categories - end')

    def reset_current_task(self,task_instance):
        # TODO: validate the new array of annotation units before deleting the current one
        print('reset_current_task - start')
        # reset Annotation_Units_Tokens
        # reset Annotation_Units_Categories
        # reset Annotation_Remote_Units_Annotation_Units
        # reset annotaion_units
        task_instance.annotation_units_set.all().delete()
        print('reset_current_task - end')

    def save_annotation_remote_unit(self,annotation_unit):
        remote_unit = Annotation_Remote_Units_Annotation_Units()

        # remote_unit.unit_id means that it is the parent
        remote_unit.unit_id = annotation_unit.parent_id

        # remote_unit.remote_unit_id means that it is the unit it was cloned from
        remote_unit_id = get_object_or_404(Annotation_Units, tree_id=annotation_unit.cloned_from_tree_id, task_id=annotation_unit.task_id )
        remote_unit.remote_unit_id = remote_unit_id

        # saving the tree_id of the remote unit
        remote_unit.remote_unit_tree_id =annotation_unit.tree_id

        remote_unit.save()
        return remote_unit

    def save_children_tokens(self,annotation_unit,tokens,old_to_new_token_id_map):
        if tokens != None:
            annotation_units = [Annotation_Units_Tokens(unit_id=annotation_unit, token_id_id = old_to_new_token_id_map[t['id']]) for t in tokens]
            Annotation_Units_Tokens.objects.bulk_create(annotation_units)
            # for t in tokens:
            #     annotation_units_token = Annotation_Units_Tokens()
            #     annotation_units_token.unit_id = annotation_unit
            #     # annotation_units_token.token_id = Tokens.objects.get(id=t['id'])
            #     annotation_units_token.token_id_id = t['id']
            #     annotation_units_token.save()


    def save_annotation_categories(self,annotation_unit,categories):
        print('save_annotation_categories - start')
        unit_categories = []
        for cat in categories:
            unit_category = Annotation_Units_Categories()
            unit_category.unit_id = annotation_unit
            # unit_category.category_id = Categories.objects.get(id=cat['id'])
            unit_category.category_id_id = cat['id']
            if 'slot' in cat:    # Omri TODO: disallow the option not to specify a slot
                unit_category.slot = cat['slot']
            else:
                unit_category.slot = 1
            unit_category.remote_parent_id = None
            unit_categories.append(unit_category)
            # unit_category.save()
        Annotation_Units_Categories.objects.bulk_create(unit_categories)
        print('save_annotation_categories - end')

    def save_review_task(self,instance):
        # TODO: CHECK IF OK !!!!
        print('save_review_task - start')
        self.save_annotation_task(instance)
        print('save_review_task - end')

    def submit(self,instance):
        if instance.type == Constants.TASK_TYPES_JSON['TOKENIZATION']:
            self.save_tokenization_task(instance)
        elif (instance.type == Constants.TASK_TYPES_JSON['ANNOTATION']):
            self.validate_annotation_task(instance)
            self.save_annotation_task(instance)
        elif (instance.type == Constants.TASK_TYPES_JSON['REVIEW']):
            self.validate_annotation_task(instance)
            self.save_review_task(instance)

        instance.status = 'SUBMITTED'
        instance.save(update_fields=['status'])

    def check_if_parent_task_ok_or_exception(self,instance):
        if instance.type == Constants.TASK_TYPES_JSON['TOKENIZATION']:
            if instance.parent_task != None:
                raise SaveTaskTypeDeniedException
        elif instance.parent_task == None:
            raise SaveTaskTypeDeniedException
class TaskSerializerAnnotator(serializers.ModelSerializer):
    created_by = DjangoUserSerializer_Simplify(many=False, read_only=True)
    passage = PassageSerializer(many=False, read_only=True, allow_null=True)
    project = ProjectSerializerForAnnotator(many=False,
                                            read_only=True,
                                            allow_null=False)
    user = serializers.SerializerMethodField()
    parent = serializers.SerializerMethodField()
    children = serializers.SerializerMethodField()
    tokens = serializers.SerializerMethodField()
    annotation_units = serializers.SerializerMethodField()
    is_active = serializers.SerializerMethodField()

    def get_is_active(self, obj):
        if not obj.is_active:
            raise GetForInactiveTaskException
        return obj.is_active

    def get_user(self, obj):
        return DjangoUserSerializer_Simplify(obj.annotator).data

    def get_parent(self, obj):
        if obj.parent_task is not None:
            return TaskInChartSerializer(obj.parent_task).data
        else:
            return None

    def get_children(self, obj):
        children_tasks = Tasks.objects.all().filter(parent_task_id=obj.id)
        children_json = []
        for cl in children_tasks:
            children_json.append(TaskInChartSerializer(cl).data)
        return children_json

    def get_tokens(self, obj):
        if (obj.type == Constants.TASK_TYPES_JSON['TOKENIZATION']):
            tokens = Tokens.objects.all().filter(
                task_id=obj.id).order_by('start_index')
        else:
            # get the tokens array from the root tokenization task
            root_tokeniztion_task_id = self.get_root_task(obj)
            tokens = Tokens.objects.all().filter(
                task_id=root_tokeniztion_task_id).order_by("start_index")

        tokens_json = []
        for index, t in enumerate(tokens):
            cur_json = TokensSerializer(t, context={
                'index_in_task': index
            }).data
            #cur_json['index_in_task'] = index
            tokens_json.append(cur_json)

        return tokens_json

    def get_annotation_units(self, obj):
        logger.info("get_annotation_units accessed")
        # **********************************
        #           AS ARRAY
        # **********************************
        orig_obj = None
        annotation_units = Annotation_Units.objects.all().filter(
            task_id=obj.id)

        # handle new refinement or extention layer taks - get the parent annotation units - start
        if (len(annotation_units) == 0 and obj.parent_task
                is not None):  # TODO: check if coarsening task is ok with that
            # get the parent task annotation units
            orig_obj = obj
            obj = obj.parent_task
            annotation_units = Annotation_Units.objects.all().filter(
                task_id=obj.id)
        # handle new refinement or extention layer taks - get the parent annotation units - end

        annotation_units_json = []
        remote_annotation_unit_array = []
        for au in annotation_units:
            # set as default is_remote_copy = False
            au.is_remote_copy = False

            # check if i have a remote units
            remote_units = Annotation_Remote_Units_Annotation_Units.objects.all(
            ).filter(unit_id=au)
            for ru in remote_units:
                # retrieve its original unit
                remote_original_unit = Annotation_Units.objects.get(
                    id=ru.remote_unit_id.id, task_id=obj.id)
                # set the remote is_remote_copy = true
                remote_original_unit.is_remote_copy = True
                # set the parent_id to be the remote's one
                remote_original_unit.parent_id = ru.unit_id
                # setting the cloned_from tree_id
                cloned_from_tree_id = remote_original_unit.tree_id
                # set the tree_id to be that of the remote unit
                remote_original_unit.tree_id = ru.remote_unit_tree_id
                # add the remote original unit to the json output
                annotation_units_json.append(
                    Annotation_UnitsSerializer(remote_original_unit,
                                               context={
                                                   'cloned_from_tree_id':
                                                   cloned_from_tree_id
                                               }).data)

            au_data = Annotation_UnitsSerializer(au).data

            if (orig_obj and orig_obj.project.layer.type !=
                    Constants.LAYER_TYPES_JSON['ROOT']):
                # take Annotation_UnitsSerializer(au).data, and alter slot to be 3+
                for index, cat in enumerate(au_data['categories']):
                    au_data['categories'][index]['slot'] = 3 + index
            annotation_units_json.append(au_data)

        # return all array sorted with all the remote units in the end

        annotation_units_json.sort(
            key=lambda x: tuple([int(a) for a in x['tree_id'].split('-')]))
        return annotation_units_json

        #return sorted(annotation_units_json, key=operator.itemgetter('is_remote_copy'), reverse=False)

        # **********************************
        #           AS ROOT OBJECT
        # **********************************
        # try:
        #     au = Annotation_Units.objects.get(task_id_id=obj.id, parent_id=None)
        # except Annotation_Units.DoesNotExist:
        #     au = None
        # return Annotation_UnitsSerializer(au).data

    def get_root_task(self, task_instance):
        root_task = task_instance
        while (root_task.parent_task != None):
            root_task = root_task.parent_task
        return root_task.id

    class Meta:
        model = Tasks
        fields = ('id', 'parent', 'children', 'type', 'project', 'user',
                  'passage', 'tokens', 'annotation_units', 'is_demo',
                  'manager_comment', 'user_comment', 'is_active', 'created_by',
                  'created_at', 'updated_at')

    def update(self, instance, validated_data):
        if instance.status == 'SUBMITTED':
            raise CantChangeSubmittedTaskExeption

        save_type = self.context['save_type']
        if (save_type == 'draft'):
            self.save_draft(instance)
        elif (save_type == 'submit'):
            self.submit(instance)
        elif (save_type == 'reset'):
            self.reset(instance)

        return instance

    def reset(self, instance):
        instance.status = Constants.TASK_STATUS_JSON['NOT_STARTED']
        instance.user_comment = ''
        if (instance.type == Constants.TASK_TYPES_JSON['TOKENIZATION']):
            self.reset_tokenization_task(instance)
        else:
            self.reset_current_task(instance)
        instance.save()

    def save_draft(self, instance):
        instance.status = 'ONGOING'
        print('save_draft')
        if (instance.type == Constants.TASK_TYPES_JSON['TOKENIZATION']):
            self.save_tokenization_task(instance)
        elif (instance.type == Constants.TASK_TYPES_JSON['ANNOTATION']):
            self.save_annotation_task(instance)
        elif (instance.type == Constants.TASK_TYPES_JSON['REVIEW']):
            self.save_review_task(instance)
        instance.save()

    def reset_tokenization_task(self, instance):
        self.check_if_parent_task_ok_or_exception(instance)
        instance.tokens_set.all().delete()

    def reset_annotation_task(self, instance):
        self.check_if_parent_task_ok_or_exception(instance)
        instance.tokens_set.all().delete()

    def save_tokenization_task(self, instance):
        print('save_tokenization_task - start')
        self.check_if_parent_task_ok_or_exception(instance)
        instance.tokens_set.all().delete()
        for token in self.initial_data['tokens']:
            newToken = Tokens()
            newToken.task_id_id = instance
            newToken.text = token['text']
            newToken.require_annotation = token['require_annotation']
            newToken.start_index = token['start_index']
            newToken.end_index = token['end_index']
            instance.tokens_set.add(newToken, bulk=False)
        print('save_tokenization_task - end')

    def save_annotation_task(self, instance):
        print('save_annotation_task - start')

        # mainly saving an annotations units array
        self.check_if_parent_task_ok_or_exception(instance)
        self.reset_current_task(instance)
        remote_units_array = []
        instance.user_comment = self.initial_data['user_comment']

        # validating tokens
        tokens = self.initial_data['tokens']
        if not strictly_increasing([x['start_index'] for x in tokens]):
            raise TokensInvalid(
                "tokens should be ordered by their start_index")
        tokens_id_to_startindex = dict([(x['id'], x['start_index'])
                                        for x in tokens])
        children_tokens_list_for_validation = []
        for au in self.initial_data['annotation_units']:
            cur_children_tokens = au.get('children_tokens')
            try:
                if cur_children_tokens:
                    cur_children_tokens_start_indices = [
                        tokens_id_to_startindex[x['id']]
                        for x in cur_children_tokens
                    ]
                else:
                    if au['type'] == 'IMPLICIT' or au['tree_id'] == '0':
                        cur_children_tokens_start_indices = None
                    else:
                        raise TokensInvalid(
                            "Only implicit units may not have a children_tokens field"
                        )
            except KeyError:
                raise TokensInvalid(
                    "children_tokens contains a token which is not in the task's tokens list."
                )
            children_tokens_list_for_validation.append(
                (au['tree_id'], (au['parent_tree_id'], au['is_remote_copy'],
                                 cur_children_tokens_start_indices)))

        print("children_tokens_list_for_validation: " +
              str(cur_children_tokens_start_indices))
        if not check_children_tokens(children_tokens_list_for_validation):
            raise TokensInvalid("Inconsistency in children_tokens detected.")

        all_tree_ids = []  # a list of all tree_ids by their order in the input

        for au in self.initial_data['annotation_units']:
            annotation_unit = Annotation_Units()
            if is_correct_format_tree_id(au['tree_id']):
                annotation_unit.tree_id = au['tree_id']
                all_tree_ids.append(au['tree_id'])
            else:
                raise TreeIdInvalid(
                    "tree_id is in an incorrect format; fix unit " +
                    str(annotation_unit.tree_id))

            annotation_unit.task_id = instance
            if au['type'] in [x[0] for x in Constants.ANNOTATION_UNIT_TYPES]:
                annotation_unit.type = au['type']
            else:
                raise UnallowedValueError(
                    "An annotation unit is given an unallowed type: " +
                    au['type'])

            annotation_unit.comment = au['comment']
            annotation_unit.cluster = au['cluster']

            annotation_unit.is_remote_copy = au['is_remote_copy']

            parent_id = None
            if au['parent_tree_id']:
                if not is_correct_format_tree_id(au['parent_tree_id']):
                    raise TreeIdInvalid(
                        "parent_tree_id is in an incorrect format; fix unit " +
                        str(annotation_unit.tree_id))
                if not is_correct_format_tree_id_child(au['parent_tree_id'],
                                                       au['tree_id']):
                    raise TreeIdInvalid(
                        "parent_tree_id and tree_id do not match in format; fix unit "
                        + str(annotation_unit.tree_id))

                parent_id = get_object_or_404(Annotation_Units,
                                              tree_id=au['parent_tree_id'],
                                              task_id=instance.id)
            else:
                if annotation_unit.tree_id != '0':
                    raise TreeIdInvalid(
                        "All annotation units but unit 0 must have a valid, non-null tree_id; fix unit "
                        + str(annotation_unit.tree_id))

            annotation_unit.parent_id = parent_id
            annotation_unit.gui_status = au['gui_status']

            if annotation_unit.is_remote_copy:

                annotation_unit.remote_categories = get_value_or_none(
                    'categories', au)
                if au['cloned_from_tree_id']:
                    if not is_correct_format_tree_id(
                            au['cloned_from_tree_id']):
                        raise TreeIdInvalid(
                            "cloned_from_tree_id is in an incorrect format; fix unit "
                            + str(annotation_unit.tree_id))
                    annotation_unit.cloned_from_tree_id = au[
                        'cloned_from_tree_id']
                else:
                    raise TreeIdInvalid(
                        "cloned_from_tree_id should be defined for all remote units"
                    )
                remote_units_array.append(annotation_unit)
            else:  # not a remote unit
                if au['cloned_from_tree_id']:
                    raise TreeIdInvalid(
                        "cloned_from_tree_id should not be defined for non-remote units"
                    )
                instance.annotation_units_set.add(annotation_unit, bulk=False)
                self.save_children_tokens(
                    annotation_unit, get_value_or_none('children_tokens', au),
                    tokens_id_to_startindex)
                self.save_annotation_categories(
                    annotation_unit, get_value_or_none('categories', au))

        if not is_tree_ids_uniq_and_consecutive(all_tree_ids):
            raise TreeIdInvalid(
                "tree_ids within a unit should be unique and consecutive")

        for annotation_unit in remote_units_array:
            remote_unit = self.save_annotation_remote_unit(annotation_unit)
            self.save_remote_annotation_categories(
                remote_unit, annotation_unit.remote_categories)

        print('save_annotation_task - end')

    def save_remote_annotation_categories(self, remote_annotation_unit,
                                          categories):
        print('save_remote_annotation_categories - start')
        for cat in categories:
            unit_category = Annotation_Units_Categories()
            unit_category.unit_id = remote_annotation_unit.remote_unit_id
            unit_category.category_id = Categories.objects.get(id=cat['id'])
            unit_category.remote_parent_id = remote_annotation_unit.unit_id

            # Omri added Sep 12:
            if 'slot' in cat:  # Omri TODO: disallow the option not to specify a slot
                unit_category.slot = cat['slot']
            else:
                unit_category.slot = 1

            unit_category.save()
        print('save_remote_annotation_categories - end')

    def reset_current_task(self, task_instance):
        # TODO: validate the new array of annotation units before deleting the current one
        print('reset_current_task - start')
        # reset Annotation_Units_Tokens
        # reset Annotation_Units_Categories
        # reset Annotation_Remote_Units_Annotation_Units
        # reset annotaion_units
        task_instance.annotation_units_set.all().delete()
        print('reset_current_task - end')

    def save_annotation_remote_unit(self, annotation_unit):
        remote_unit = Annotation_Remote_Units_Annotation_Units()

        # remote_unit.unit_id means that it is the parent
        remote_unit.unit_id = annotation_unit.parent_id

        # remote_unit.remote_unit_id means that it is the unit it was cloned from
        remote_unit_id = get_object_or_404(
            Annotation_Units,
            tree_id=annotation_unit.cloned_from_tree_id,
            task_id=annotation_unit.task_id)
        remote_unit.remote_unit_id = remote_unit_id

        # saving the tree_id of the remote unit
        remote_unit.remote_unit_tree_id = annotation_unit.tree_id

        remote_unit.save()
        return remote_unit

    def save_children_tokens(self, annotation_unit, tokens, id_to_start_index):
        if tokens != None:
            print('save_children_tokens - start')
            for t in tokens:
                annotation_units_token = Annotation_Units_Tokens()
                annotation_units_token.unit_id = annotation_unit
                annotation_units_token.token_id = Tokens.objects.get(
                    id=t['id'])
                annotation_units_token.save()
            print('save_children_tokens - end')

    def save_annotation_categories(self, annotation_unit, categories):
        print('save_annotation_categories - start')
        for cat in categories:
            unit_category = Annotation_Units_Categories()
            unit_category.unit_id = annotation_unit
            unit_category.category_id = Categories.objects.get(id=cat['id'])
            if 'slot' in cat:  # Omri TODO: disallow the option not to specify a slot
                unit_category.slot = cat['slot']
            else:
                unit_category.slot = 1
            unit_category.remote_parent_id = None
            unit_category.save()
        print('save_annotation_categories - end')

    def save_review_task(self, instance):
        # TODO: CHECK IF OK !!!!
        print('save_review_task - start')
        self.save_annotation_task(instance)
        print('save_review_task - end')

    def submit(self, instance):
        if instance.type == Constants.TASK_TYPES_JSON['TOKENIZATION']:
            self.save_tokenization_task(instance)
        instance.status = 'SUBMITTED'
        instance.save(update_fields=['status'])

    def check_if_parent_task_ok_or_exception(self, instance):
        if instance.type == Constants.TASK_TYPES_JSON['TOKENIZATION']:
            if instance.parent_task != None:
                raise SaveTaskTypeDeniedException
        elif instance.parent_task == None:
            raise SaveTaskTypeDeniedException
Exemple #9
0
class TaskSerializerAnnotator(serializers.ModelSerializer):
    created_by = DjangoUserSerializer_Simplify(many=False, read_only=True)
    passage = PassageSerializer(many=False, read_only=True, allow_null=True)
    project = ProjectSerializerForAnnotator(many=False,
                                            read_only=True,
                                            allow_null=False)
    user = serializers.SerializerMethodField()
    parent = serializers.SerializerMethodField()
    children = serializers.SerializerMethodField()
    tokens = serializers.SerializerMethodField()
    annotation_units = serializers.SerializerMethodField()

    def get_user(self, obj):
        return DjangoUserSerializer_Simplify(obj.annotator).data

    def get_parent(self, obj):
        if obj.parent_task is not None:
            return TaskInChartSerializer(obj.parent_task).data
        else:
            return None

    def get_children(self, obj):
        children_tasks = Tasks.objects.all().filter(parent_task_id=obj.id)
        children_json = []
        for cl in children_tasks:
            children_json.append(TaskInChartSerializer(cl).data)
        return children_json

    def get_tokens(self, obj):
        if (obj.type == Constants.TASK_TYPES_JSON['TOKENIZATION']):
            tokens = Tokens.objects.all().filter(task_id=obj.id)
        else:
            # get the tokens array from the root tokenization task
            root_tokeniztion_task_id = self.get_root_task(obj)
            tokens = Tokens.objects.all().filter(
                task_id=root_tokeniztion_task_id)

        tokens_json = []
        for t in tokens:
            tokens_json.append(TokensSerializer(t).data)

        tokens_json.sort(key=lambda x: x["start_index"])
        return tokens_json

    def get_annotation_units(self, obj):
        # **********************************
        #           AS ARRAY
        # **********************************
        annotation_units = Annotation_Units.objects.all().filter(
            task_id=obj.id).order_by('id')

        # handle new refinement or extention layer taks - get the parent annotation units - start
        if (len(annotation_units) == 0 and obj.parent_task
                is not None):  # TODO: check if coarsening task is ok with that
            # get the parent task annotation units
            obj = obj.parent_task
            annotation_units = Annotation_Units.objects.all().filter(
                task_id=obj.id).order_by('id')
        # handle new refinement or extention layer taks - get the parent annotation units - end

        annotation_units_json = []
        remote_annotation_unit_array = []
        for au in annotation_units:
            # set as default is_remote_copy = False
            au.is_remote_copy = False

            # check if i have a remote units
            remote_units = Annotation_Remote_Units_Annotation_Units.objects.all(
            ).filter(unit_id=au)
            for ru in remote_units:
                # retrieve its original unit
                remote_original_unit = Annotation_Units.objects.get(
                    id=ru.remote_unit_id.id, task_id=obj.id)
                # set the remote is_remote_copy = true
                remote_original_unit.is_remote_copy = True
                # set the parent_id to be the remote's one
                remote_original_unit.parent_id = ru.unit_id
                # add the remote original unit to the json output
                annotation_units_json.append(
                    Annotation_UnitsSerializer(remote_original_unit).data)

            annotation_units_json.append(Annotation_UnitsSerializer(au).data)
        # return all array sorted with all the remote units in the end
        return sorted(annotation_units_json,
                      key=operator.itemgetter('is_remote_copy'),
                      reverse=False)

        # **********************************
        #           AS ROOT OBJECT
        # **********************************
        # try:
        #     au = Annotation_Units.objects.get(task_id_id=obj.id, parent_id=None)
        # except Annotation_Units.DoesNotExist:
        #     au = None
        # return Annotation_UnitsSerializer(au).data

    def get_root_task(self, task_instance):
        root_task = task_instance
        while (root_task.parent_task != None):
            root_task = root_task.parent_task
        return root_task.id

    class Meta:
        model = Tasks
        fields = ('id', 'parent', 'children', 'type', 'status', 'project',
                  'user', 'passage', 'tokens', 'annotation_units', 'is_demo',
                  'manager_comment', 'is_active', 'created_by', 'created_at',
                  'updated_at')

    def update(self, instance, validated_data):
        # disable saving a SUBMITTED task
        if instance.status == 'SUBMITTED':
            raise CantChangeSubmittedTaskExeption

        save_type = self.initial_data['save_type']
        if (save_type == 'draft'):
            self.save_draft(instance)
        elif (save_type == 'submit'):
            self.submit(instance)

        return instance

    def save_draft(self, instance):
        instance.status = 'ONGOING'
        print('save_draft')
        if (instance.type == Constants.TASK_TYPES_JSON['TOKENIZATION']):
            self.save_tokenization_task(instance)
        elif (instance.type == Constants.TASK_TYPES_JSON['ANNOTATION']):
            self.save_annotation_task(instance)
        elif (instance.type == Constants.TASK_TYPES_JSON['REVIEW']):
            self.save_review_task(instance)
        instance.save()

    def save_tokenization_task(self, instance):
        print('save_tokenization_task - start')
        self.check_if_parent_task_ok_or_exception(instance)
        instance.tokens_set.all().delete()
        for token in self.initial_data['tokens']:
            newToken = Tokens()
            newToken.task_id_id = instance
            newToken.text = token['text']
            newToken.require_annotation = (not isPunct(newToken.text))
            newToken.start_index = token['start_index']
            newToken.end_index = token['end_index']
            instance.tokens_set.add(newToken, bulk=False)
        print('save_tokenization_task - end')

    def save_annotation_task(self, instance):
        print('save_annotation_task - start')
        # mainly saving an annotations units array
        self.check_if_parent_task_ok_or_exception(instance)
        self.reset_current_task(instance)
        remote_units_array = []
        for au in self.initial_data['annotation_units']:
            annotation_unit = Annotation_Units()
            annotation_unit.annotation_unit_tree_id = au[
                'annotation_unit_tree_id']
            annotation_unit.task_id = instance
            annotation_unit.type = au['type']
            annotation_unit.comment = au['comment']
            annotation_unit.is_remote_copy = au['is_remote_copy']

            parent_id = None
            if au['parent_id']:
                parent_id = get_object_or_404(
                    Annotation_Units,
                    annotation_unit_tree_id=au['parent_id'],
                    task_id=instance.id)

            annotation_unit.parent_id = parent_id
            annotation_unit.gui_status = au['gui_status']

            if annotation_unit.is_remote_copy == True:
                annotation_unit.remote_categories = get_value_or_none(
                    'categories', au)
                remote_units_array.append(annotation_unit)
            else:
                instance.annotation_units_set.add(annotation_unit, bulk=False)
                self.save_children_tokens(
                    annotation_unit, get_value_or_none('children_tokens', au))
                self.save_annotation_categories(
                    annotation_unit, get_value_or_none('categories', au))

        for annotation_unit in remote_units_array:
            remote_unit = self.save_annotation_remote_unit(annotation_unit)
            self.save_remote_annotation_categories(
                remote_unit, annotation_unit.remote_categories)

        print('save_annotation_task - end')

    def save_remote_annotation_categories(self, remote_annotation_unit,
                                          categories):
        print('save_remote_annotation_categories - start')
        for cat in categories:
            unit_category = Annotation_Units_Categories()
            unit_category.unit_id = remote_annotation_unit.remote_unit_id
            unit_category.category_id = Categories.objects.get(id=cat['id'])
            unit_category.remote_parent_id = remote_annotation_unit.unit_id
            unit_category.save()
        print('save_remote_annotation_categories - end')

    def reset_current_task(self, task_instance):
        # TODO: validate the new array of annotation units before deleting the current one
        print('reset_current_task - start')
        # reset Annotation_Units_Tokens
        # reset Annotation_Units_Categories
        # reset Annotation_Remote_Units_Annotation_Units
        # reset annotaion_units
        task_instance.annotation_units_set.all().delete()
        print('reset_current_task - end')

    def save_annotation_remote_unit(self, annotation_unit):
        remote_unit = Annotation_Remote_Units_Annotation_Units()
        # remote_unit.unit_id means that it is the parent
        remote_unit.unit_id = annotation_unit.parent_id
        # remote_unit.remote_unit_id means that it is the remote unit
        remote_unit_id = get_object_or_404(
            Annotation_Units,
            annotation_unit_tree_id=annotation_unit.annotation_unit_tree_id,
            task_id=annotation_unit.task_id)
        remote_unit.remote_unit_id = remote_unit_id
        remote_unit.save()
        return remote_unit

    def save_children_tokens(self, annotation_unit, tokens):
        if tokens != None:
            print('save_children_tokens - start')
            for t in tokens:
                annotation_units_token = Annotation_Units_Tokens()
                annotation_units_token.unit_id = annotation_unit
                annotation_units_token.token_id = Tokens.objects.get(
                    id=t['id'])
                annotation_units_token.save()
            print('save_children_tokens - end')

    def save_annotation_categories(self, annotation_unit, categories):
        print('save_annotation_categories - start')
        for cat in categories:
            unit_category = Annotation_Units_Categories()
            unit_category.unit_id = annotation_unit
            unit_category.category_id = Categories.objects.get(id=cat['id'])
            unit_category.remote_parent_id = None
            unit_category.save()
        print('save_annotation_categories - end')

    def save_review_task(self, instance):
        # TODO: CHECK IF OK !!!!
        print('save_review_task - start')
        self.save_annotation_task(instance)
        print('save_review_task - end')

    def submit(self, instance):
        instance.status = 'SUBMITTED'
        print('submit')
        instance.save()

    def check_if_parent_task_ok_or_exception(self, instance):
        if instance.type == Constants.TASK_TYPES_JSON['TOKENIZATION']:
            if instance.parent_task != None:
                raise SaveTaskTypeDeniedException
        elif instance.parent_task == None:
            raise SaveTaskTypeDeniedException