Beispiel #1
0
    def update(self, instance, validated_data):
        instance.concept_class = validated_data.get('concept_class', instance.concept_class)
        instance.datatype = validated_data.get('datatype', instance.datatype)
        instance.extras = validated_data.get('extras', instance.extras)
        instance.external_id = validated_data.get('external_id', instance.external_id)
        instance.comment = validated_data.get('update_comment') or validated_data.get('comment')
        instance.retired = validated_data.get('retired', instance.retired)

        new_names = [
            LocalizedText(
                **{k: v for k, v in name.items() if k not in ['name_type']}
            ) for name in validated_data.get('names', [])
        ]
        new_descriptions = [
            LocalizedText(
                **{k: v for k, v in desc.items() if k not in ['description_type']}
            ) for desc in validated_data.get('descriptions', [])
        ]

        instance.cloned_names = compact(new_names)
        instance.cloned_descriptions = compact(new_descriptions)
        errors = Concept.persist_clone(instance, self.context.get('request').user)
        if errors:
            self._errors.update(errors)
        return instance
Beispiel #2
0
    def make_parts(self):
        prev_line = None
        orgs = self.resource_distribution.get('Organization', None)
        sources = self.resource_distribution.get('Source', None)
        collections = self.resource_distribution.get('Collection', None)
        if orgs:
            self.parts = [orgs]
        if sources:
            self.parts.append(sources)
        if collections:
            self.parts.append(collections)

        self.parts = compact(self.parts)

        self.parts.append([])

        for data in self.input_list:
            line = json.loads(data)
            data_type = line.get('type', None).lower()
            if data_type not in ['organization', 'source', 'collection']:
                if prev_line:
                    prev_type = prev_line.get('type').lower()
                    if prev_type == data_type or (
                            data_type not in ['concept', 'mapping']
                            and prev_type not in ['concept', 'mapping']):
                        self.parts[-1].append(line)
                    else:
                        self.parts.append([line])
                else:
                    self.parts[-1].append(line)
                prev_line = line

        self.parts = compact(self.parts)
Beispiel #3
0
    def predict(self, text, original_text, pos):
        doc = self.model(text)

        def default_entity_mapping(entity):
            return {
                "tag": entity.label_,
                "entity":
                " ".join(original_text.split()[entity.start:entity.end]),
                "start": entity.start,
                "end": entity.end
            }

        default_entities = compact(list(map(default_entity_mapping, doc.ents)))
        pos_mapping = []
        if pos is not None:

            def default_pos_mapping(word):
                original_text_tokens = original_text.split()
                text_tokens = text.split()
                word_text = word.text
                word_index = text_tokens.index(word_text)
                return {
                    "text":
                    original_text_tokens[word_index]
                    if word_index < len(original_text_tokens) else word_text,
                    "lemma":
                    word.lemma_,
                    "tag":
                    word.tag_,
                    "pos":
                    word.pos_
                }

            pos_mapping = list(map(default_pos_mapping, doc))
        return default_entities, pos_mapping
Beispiel #4
0
 def create(self, request, **kwargs):  # pylint: disable=unused-argument
     if not self.parent_resource:
         return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED)
     permission = HasOwnership()
     if not permission.has_object_permission(request, self,
                                             self.parent_resource):
         return Response(status=status.HTTP_403_FORBIDDEN)
     serializer = self.get_serializer(
         data={
             'mnemonic':
             request.data.get('id'),
             'supported_locales':
             compact(request.data.pop('supported_locales', '').split(',')),
             'version':
             HEAD,
             **request.data,
             **{
                 self.parent_resource.resource_type.lower():
                 self.parent_resource.id
             }
         })
     if serializer.is_valid():
         instance = serializer.save(force_insert=True)
         if serializer.is_valid():
             headers = self.get_success_headers(serializer.data)
             serializer = self.get_detail_serializer(instance)
             return Response(serializer.data,
                             status=status.HTTP_201_CREATED,
                             headers=headers)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Beispiel #5
0
    def saved_unsaved_names(self):
        unsaved_names = get(self, 'cloned_names', [])

        if self.id:
            return compact([*list(self.names.all()), *unsaved_names])

        return unsaved_names
Beispiel #6
0
    def persist_clone(cls, obj, user=None, create_parent_version=True, parent_concept_uris=None, **kwargs):  # pylint: disable=too-many-statements
        errors = dict()
        if not user:
            errors['version_created_by'] = PERSIST_CLONE_SPECIFY_USER_ERROR
            return errors
        obj.created_by = user
        obj.updated_by = user
        obj.version = obj.version or generate_temp_version()
        parent = obj.parent
        parent_head = parent.head
        persisted = False
        versioned_object = obj.versioned_object
        prev_latest_version = versioned_object.versions.exclude(id=obj.id).filter(is_latest_version=True).first()
        try:
            with transaction.atomic():
                cls.pause_indexing()

                obj.is_latest_version = True
                obj.save(**kwargs)
                if obj.id:
                    obj.version = str(obj.id)
                    obj.save()
                    obj.set_locales()
                    obj.clean()  # clean here to validate locales that can only be saved after obj is saved
                    obj.update_versioned_object()
                    if prev_latest_version:
                        prev_latest_version.is_latest_version = False
                        prev_latest_version.save()

                    obj.sources.set(compact([parent, parent_head]))
                    persisted = True
                    cls.resume_indexing()
                    if get(settings, 'TEST_MODE', False):
                        process_hierarchy_for_concept_version(
                            obj.id, get(prev_latest_version, 'id'), parent_concept_uris, create_parent_version)
                    else:
                        process_hierarchy_for_concept_version.delay(
                            obj.id, get(prev_latest_version, 'id'), parent_concept_uris, create_parent_version)

                    def index_all():
                        if prev_latest_version:
                            prev_latest_version.index()
                        obj.index()

                    transaction.on_commit(index_all)
        except ValidationError as err:
            errors.update(err.message_dict)
        finally:
            cls.resume_indexing()
            if not persisted:
                if prev_latest_version:
                    prev_latest_version.is_latest_version = True
                    prev_latest_version.save()
                if obj.id:
                    obj.remove_locales()
                    obj.sources.remove(parent_head)
                    obj.delete()
                errors['non_field_errors'] = [PERSIST_CLONE_ERROR]

        return errors
Beispiel #7
0
    def predict(self, text, original_text, pos):
        assert self.model is not None, "Please build the NER before using it for prediction"
        #text = text.decode('utf-8')
        results = self.model.extract_entities(text.split())

        def entity_mapping(e):
            score = e[2]
            if score > 0:
                entity_range = e[0]
                return {
                    "entity":
                    " ".join(original_text.split()
                             [entity_range[0]:entity_range[-1] + 1]),
                    "tag":
                    e[1],
                    "score":
                    e[2],
                    "start":
                    entity_range[0],
                    "end":
                    entity_range[-1] + 1
                }

        entities = compact(list(map(entity_mapping, results)))
        return entities, None
Beispiel #8
0
    def get_csv(self, request, queryset=None):
        filename, url, prepare_new_file, is_member = None, None, True, False

        parent = None  # TODO: fix this for parent (owner)

        if parent:
            prepare_new_file = False
            user = request.query_params.get('user', None)
            is_member = self._is_member(parent, user)

        try:
            path = request.__dict__.get('_request').path
            filename = '_'.join(compact(path.split('/'))).replace('.', '_')
            kwargs = {
                'filename': filename,
            }
        except Exception:  # pylint: disable=broad-except
            kwargs = {}

        if filename and prepare_new_file:
            url = get_csv_from_s3(filename, is_member)

        if not url:
            queryset = queryset or self._get_query_set_from_view(is_member)
            data = self.get_csv_rows(queryset) if hasattr(self, 'get_csv_rows') else queryset.values()
            url = write_csv_to_s3(data, is_member, **kwargs)

        return Response({'url': url}, status=200)
Beispiel #9
0
    def update(self, request):
        if not self.parent_resource:
            return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED)

        self.object = self.get_object()
        save_kwargs = {
            'force_update': True,
            'parent_resource': self.parent_resource
        }
        success_status_code = status.HTTP_200_OK

        supported_locales = request.data.pop('supported_locales', '')
        if isinstance(supported_locales, str):
            supported_locales = compact(supported_locales.split(','))

        request.data['supported_locales'] = supported_locales
        serializer = self.get_serializer(self.object,
                                         data=request.data,
                                         partial=True)

        if serializer.is_valid():
            self.object = serializer.save(**save_kwargs)
            if serializer.is_valid():
                serializer = self.get_detail_serializer(self.object)
                return Response(serializer.data, status=success_status_code)

        return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Beispiel #10
0
    def predict(self, text, original_text, pos):
        tokens = tokenize_utterance(text)
        entities = self.model.extract_entities(tokens)

        def default_entity_mapping(entity):
            range = entity[0]
            ind = []
            for i in range:
                ind.append(i)

            return {
                "tag": entity[1],
                "entity":
                " ".join(original_text.split()[range[0]:range[-1] + 1]),
                "start": ind[0],
                "end": ind[-1] + 1,
                "resolvedTo": {
                    'baseEntity':
                    " ".join(original_text.split()[range[0]:range[-1] + 1])
                }
            }

        default_entities = compact(list(map(default_entity_mapping, entities)))

        return default_entities, None
Beispiel #11
0
    def predict(self, text, original_text, pos):
        assert self.model is not None, "Please build the NER before using it for prediction"
        input_text_pos_tag = pos_tags_predict(text, original_text, language)
        original_text_list = original_text.split()
        input_text_test = sent2features(input_text_pos_tag)
        entities_list = self.model.predict_marginals_single(input_text_test)

        def entity_mapping(indx):
            tag = max(iter(entities_list[indx].items()),
                      key=operator.itemgetter(1))[0]

            if tag != "O":
                return {
                    "entity":
                    original_text_list[indx],  # input_text_pos_tag[indx][0],
                    "tag": tag,
                    "score": entities_list[indx][tag],
                    "start": indx,
                    "end": indx + 1
                }

        entities = compact(
            list(map(entity_mapping, list(range(0, len(input_text_pos_tag))))))
        entities = format_response(entities)

        return entities, None
Beispiel #12
0
    def prepare(self, utterances):
        logger = logging.getLogger(__name__)
        predefined_tags = get_predefined_entities(self.serviceid)

        def extract_entity_names(entry_each):
            return get(entry_each, "entity")

        patterns = list(
            map(extract_entity_names, get_pattern_entities(self.serviceid)))
        phrases = list(
            map(extract_entity_names, get_phrase_entities(self.serviceid)))
        label_list = []

        def get_sample(data):
            assert 'mapping' in data, "Token mapping missing from training data"
            assert "utterance" in data, "Utterance text missing from training data"
            try:
                utterance = get(data, "case_converted_utterance")
                logger.debug("Preparing utterance: %s" % utterance)
                mapping = json.loads(get(data, "mapping"))
                assert "tags" in mapping, "Tags missing from training data"
                tags = get(mapping, 'tags')
                tokens = utterance.split()
                sample = ner_training_instance(tokens)
                for tag in tags:
                    start = get(tag, 'start')
                    end = get(tag, 'end')
                    label = get(tag, 'tag')
                    label = label.encode('utf-8')
                    # ignoreTag = (label.upper() in predefined_tags)
                    ignoreTag = (label.upper() in predefined_tags) \
                                or (label in patterns) or (label in phrases)
                    if not ignoreTag:
                        assert all(v is not None for v in [start, end, label]), \
                            "Missing information for adding entities to training"
                        logger.info("Adding entity: %s" % label)
                        logger.info("Start range: %s" % start)
                        logger.info("End range: %s" % end)
                        sample.add_entity(range(start, end), label.upper())
                        if not label.upper() in label_list:
                            label_list.append(label.upper())
                        logger.info("label_list %s" % (label_list))
                data['ner_trained'] = True
                return sample, data
            except (TypeError, Exception) as e:
                data['ner_trained'] = False
                return None, data

        assert len(utterances) > 0, "Not enough utterances for training"
        results = list(map(get_sample, utterances))
        assert len(
            label_list
        ) > 0, "Unable to do entity training as no custom entities are mapped. Please map atleast 2 custom entities to proceed."
        assert len(
            label_list
        ) > 1, "Atleast 2 custom entities are mandatory to perform entity training. Please add one more custom entity to proceed."
        samples = compact([items[0] for items in results])
        trained_utterances = [items[1] for items in results]
        return samples, trained_utterances
Beispiel #13
0
    def create_new_version_for(cls, instance, data, user):
        instance.concept_class = data.get('concept_class',
                                          instance.concept_class)
        instance.datatype = data.get('datatype', instance.datatype)
        instance.extras = data.get('extras', instance.extras)
        instance.external_id = data.get('external_id', instance.external_id)
        instance.comment = data.get('update_comment') or data.get('comment')
        instance.retired = data.get('retired', instance.retired)

        new_names = LocalizedText.build_locales(data.get('names', []))
        new_descriptions = LocalizedText.build_locales(
            data.get('descriptions', []), 'description')

        instance.cloned_names = compact(new_names)
        instance.cloned_descriptions = compact(new_descriptions)

        return cls.persist_clone(instance, user)
Beispiel #14
0
    def persist_clone(cls, obj, user=None, **kwargs):  # pylint: disable=too-many-statements
        errors = dict()
        if not user:
            errors['version_created_by'] = PERSIST_CLONE_SPECIFY_USER_ERROR
            return errors
        obj.created_by = user
        obj.updated_by = user
        obj.version = TEMP
        parent = obj.parent
        parent_head = parent.head
        persisted = False
        latest_version = None
        try:
            with transaction.atomic():
                cls.pause_indexing()

                obj.is_latest_version = True
                obj.save(**kwargs)
                obj.version = str(obj.id)
                obj.save()
                if obj.id:
                    obj.set_locales()
                    obj.clean(
                    )  # clean here to validate locales that can only be saved after obj is saved
                    obj.update_versioned_object()
                    versioned_object = obj.versioned_object
                    latest_version = versioned_object.versions.exclude(
                        id=obj.id).filter(is_latest_version=True).first()
                    latest_version.is_latest_version = False
                    latest_version.save()
                    obj.sources.set(compact([parent, parent_head]))

                    persisted = True
                    cls.resume_indexing()

                    def index_all():
                        parent.save()
                        parent_head.save()
                        latest_version.save()
                        obj.save()

                    transaction.on_commit(index_all)

        except ValidationError as err:
            errors.update(err.message_dict)
        finally:
            cls.resume_indexing()
            if not persisted:
                if latest_version:
                    latest_version.is_latest_version = True
                    latest_version.save()
                if obj.id:
                    obj.remove_locales()
                    obj.sources.remove(parent_head)
                    obj.delete()
                errors['non_field_errors'] = [PERSIST_CLONE_ERROR]

        return errors
Beispiel #15
0
    def post(self, _, resource):
        model = get_resource_class_from_resource_name(resource)

        if not model:
            return Response(status=status.HTTP_404_NOT_FOUND)

        ids = self.request.data.get('ids', None)
        if ids:
            ids = compact([i.strip() for i in compact(ids.split(','))])

        if not ids:
            return Response(status=status.HTTP_400_BAD_REQUEST)

        for instance in model.objects.filter(
                **{"{}__in".format(model.mnemonic_attr): ids}):
            instance.save()

        return Response(status=status.HTTP_202_ACCEPTED)
Beispiel #16
0
    def update_mappings(self):
        from core.mappings.models import Mapping
        uris = compact([self.uri, self.canonical_url])
        for mapping in Mapping.objects.filter(to_source__isnull=True, to_source_url__in=uris):
            mapping.to_source = self
            mapping.save()

        for mapping in Mapping.objects.filter(from_source__isnull=True, from_source_url__in=uris):
            mapping.from_source = self
            mapping.save()
Beispiel #17
0
 def is_processing(self):
     background_ids = compact(self._background_process_ids)
     if background_ids:
         for process_id in background_ids.copy():
             res = AsyncResult(process_id)
             if res.successful() or res.failed():
                 self.remove_processing(process_id)
             else:
                 return True
     return bool(self._background_process_ids)
Beispiel #18
0
    def create_new_version_for(cls, instance, data, user, create_parent_version=True):
        instance.concept_class = data.get('concept_class', instance.concept_class)
        instance.datatype = data.get('datatype', instance.datatype)
        instance.extras = data.get('extras', instance.extras)
        instance.external_id = data.get('external_id', instance.external_id)
        instance.comment = data.get('update_comment') or data.get('comment')
        instance.retired = data.get('retired', instance.retired)

        new_names = LocalizedText.build_locales(data.get('names', []))
        new_descriptions = LocalizedText.build_locales(data.get('descriptions', []), 'description')
        has_parent_concept_uris_attr = 'parent_concept_urls' in data
        parent_concept_uris = data.pop('parent_concept_urls', None)

        instance.cloned_names = compact(new_names)
        instance.cloned_descriptions = compact(new_descriptions)

        if not parent_concept_uris and has_parent_concept_uris_attr:
            parent_concept_uris = []

        return cls.persist_clone(instance, user, create_parent_version, parent_concept_uris)
Beispiel #19
0
 def queue_tasks(self, part_list, is_child):
     chunked_lists = compact(
         self.chunker_list(part_list, self.parallel
                           ) if is_child else [part_list])
     jobs = group(
         bulk_import_parts_inline.s(_list, self.username,
                                    self.update_if_exists)
         for _list in chunked_lists)
     group_result = jobs.apply_async(queue='concurrent')
     self.groups.append(group_result)
     self.tasks += group_result.results
Beispiel #20
0
    def persist_clone(cls, obj, user=None, **kwargs):
        errors = dict()
        if not user:
            errors['version_created_by'] = PERSIST_CLONE_SPECIFY_USER_ERROR
            return errors
        obj.version = obj.version or generate_temp_version()
        obj.created_by = user
        obj.updated_by = user
        parent = obj.parent
        parent_head = parent.head
        persisted = False
        prev_latest_version = None
        try:
            with transaction.atomic():
                cls.pause_indexing()

                obj.is_latest_version = True
                obj.save(**kwargs)
                if obj.id:
                    obj.version = str(obj.id)
                    obj.save()
                    obj.update_versioned_object()
                    versioned_object = obj.versioned_object
                    prev_latest_version = versioned_object.versions.exclude(id=obj.id).filter(
                        is_latest_version=True).first()
                    if prev_latest_version:
                        prev_latest_version.is_latest_version = False
                        prev_latest_version.save()

                    obj.sources.set(compact([parent, parent_head]))
                    persisted = True
                    cls.resume_indexing()

                    def index_all():
                        if prev_latest_version:
                            prev_latest_version.index()
                        obj.index()

                    transaction.on_commit(index_all)
        except ValidationError as err:
            errors.update(err.message_dict)
        finally:
            cls.resume_indexing()
            if not persisted:
                if obj.id:
                    obj.sources.remove(parent_head)
                    if prev_latest_version:
                        prev_latest_version.is_latest_version = True
                        prev_latest_version.save()
                    obj.delete()
                errors['non_field_errors'] = [PERSIST_CLONE_ERROR]

        return errors
Beispiel #21
0
    def update_mappings(self):
        from core.mappings.models import Mapping
        parent_uris = compact([self.parent.uri, self.parent.canonical_url])
        for mapping in Mapping.objects.filter(to_concept_code=self.mnemonic,
                                              to_source_url__in=parent_uris,
                                              to_concept__isnull=True):
            mapping.to_concept = self
            mapping.save()

        for mapping in Mapping.objects.filter(from_concept_code=self.mnemonic,
                                              from_source_url__in=parent_uris,
                                              from_concept__isnull=True):
            mapping.from_concept = self
            mapping.save()
Beispiel #22
0
    def run(self) -> (dict, List[dict]):
        """
        Get the hardware information.

        This method returns *almost* DeviceHub ready information in a
        tuple, where the first element is information related to the
        overall machine, like the S/N of the computer, and the second
        item is a list of hardware information per component.
        """
        computer = self.computer()
        components = chain(self.processors(), self.ram_modules(),
                           self.hard_drives(), self.graphic_cards(),
                           [self.motherboard()], self.network_adapters(),
                           self.sound_cards())
        return computer, compact(components)
Beispiel #23
0
def self_desc(cls, omit=None):
    '''Method to get self description, used at init.'''
    desc_list = [f'{get_class_name(cls)}:']
    omit_list = ps.compact(cast_list(omit))
    for k, v in get_class_attr(cls).items():
        if k in omit_list:
            continue
        if k == 'spec':  # spec components are described at their object level; for session, only desc spec.meta
            desc_v = pformat(v['meta'])
        elif ps.is_dict(v) or ps.is_dict(ps.head(v)):
            desc_v = pformat(v)
        else:
            desc_v = v
        desc_list.append(f'- {k} = {desc_v}')
    desc = '\n'.join(desc_list)
    return desc
Beispiel #24
0
def monthly_chart_data(queryset: QuerySet) -> Dict:
    series_queryset = EventType.objects.filter(
        events__in=queryset).distinct().order_by("name").values(
            "name", "colour")
    series_dict = {
        item["name"]: {
            "name": item["name"],
            "color": item["colour"],
            "data": []
        }
        for item in series_queryset
    }

    chart_data: Dict[str, Any] = {"categories": [], "series": []}
    data_queryset = (queryset.annotate(
        datetime=TruncMonth("start_datetime")).order_by(
            "datetime", "type__name").values(
                "datetime",
                "type__name").distinct().annotate(count=Count("id")))
    data_dict = [{
        **item, "month": item["datetime"].strftime("%Y-%m")
    } for item in data_queryset]

    structured_data_dict = pydash.group_by(data_dict, "month")
    for month, data in structured_data_dict.items():
        structured_data_dict[month] = pydash.group_by(data, "type__name")

    chart_data["categories"] = list(structured_data_dict.keys())
    for category in chart_data["categories"]:
        event_types = structured_data_dict[category].keys()
        for event_type, serie in series_dict.items():
            if event_type in event_types:
                series_dict[event_type]["data"].append(
                    structured_data_dict[category][event_type][0]["count"])
            else:
                series_dict[event_type]["data"].append(None)
    chart_data["series"] = list(series_dict.values())

    average_data = []
    for index in range(len(chart_data["categories"])):
        truly_values = pydash.compact(
            [serie["data"][index] for serie in chart_data["series"]])
        average_data.append(sum(truly_values))
    chart_data["average"] = mean(average_data) if len(average_data) else None

    return chart_data
Beispiel #25
0
def prepath_to_idxs(prepath):
    '''Extract trial index and session index from prepath if available'''
    _, _, prename, spec_name, _, _ = prepath_split(prepath)
    idxs_tail = prename.replace(spec_name, '').strip('_')
    idxs_strs = ps.compact(idxs_tail.split('_')[:2])
    if ps.is_empty(idxs_strs):
        return None, None
    tidx = idxs_strs[0]
    assert tidx.startswith('t')
    trial_index = int(tidx.strip('t'))
    if len(idxs_strs) == 1:  # has session
        session_index = None
    else:
        sidx = idxs_strs[1]
        assert sidx.startswith('s')
        session_index = int(sidx.strip('s'))
    return trial_index, session_index
Beispiel #26
0
    def persist_clone(cls, obj, user=None, **kwargs):
        errors = dict()
        if not user:
            errors[
                'version_created_by'] = 'Must specify which user is attempting to create a new concept version.'
            return errors
        obj.created_by = user
        obj.version = TEMP
        parent = obj.parent
        parent_head = parent.head
        persisted = False
        errored_action = 'saving new concept version'
        latest_versions = None
        try:
            obj.is_latest_version = True
            obj.save(**kwargs)
            obj.version = str(obj.id)
            obj.save()
            obj.set_locales()
            obj.clean(
            )  # clean here to validate locales that can only be saved after obj is saved
            latest_versions = obj.versions.exclude(id=obj.id).filter(
                is_latest_version=True)
            latest_versions.update(is_latest_version=False)
            obj.sources.set(compact([parent, parent_head]))

            # to update counts
            parent.save()
            parent_head.save()

            persisted = True
        except ValidationError as err:
            errors.update(err.message_dict)
        finally:
            if not persisted:
                obj.remove_locales()
                obj.sources.remove(parent_head)
                if latest_versions:
                    latest_versions.update(is_latest_version=True)
                if obj.id:
                    obj.delete()
                errors['non_field_errors'] = [
                    'An error occurred while %s.' % errored_action
                ]

        return errors
Beispiel #27
0
def clean(text):
    """Trim and replace multiple spaces with a single space.

    Args:
        text (str): String to clean.

    Returns:
        str: Cleaned string.

    Example:

        >>> clean('a  b   c    d')
        'a b c d'

    .. versionadded:: 3.0.0
    """
    text = pyd.to_string(text)
    return ' '.join(pyd.compact(text.split()))
Beispiel #28
0
def clean(text):
    """Trim and replace multiple spaces with a single space.

    Args:
        text (str): String to clean.

    Returns:
        str: Cleaned string.

    Example:

        >>> clean('a  b   c    d')
        'a b c d'

    ..versionadded:: 3.0.0
    """
    text = pyd.to_string(text)
    return " ".join(pyd.compact(text.split()))
Beispiel #29
0
    def persist_clone(cls, obj, user=None, **kwargs):
        errors = dict()
        if not user:
            errors[
                'version_created_by'] = "Must specify which user is attempting to create a new {} version.".format(
                    cls.get_resource_url_kwarg())
            return errors
        obj.version = TEMP
        obj.created_by = user
        parent = obj.parent
        parent_head = parent.head
        persisted = False
        errored_action = 'saving new mapping version'
        latest_versions = None
        try:
            obj.is_latest_version = True
            obj.full_clean()
            obj.save(**kwargs)
            obj.version = str(obj.id)
            obj.save()
            latest_versions = obj.versions.exclude(id=obj.id).filter(
                is_latest_version=True)
            latest_versions.update(is_latest_version=False)
            obj.sources.set(compact([parent, parent_head]))

            # to update counts
            parent.save()
            parent_head.save()

            persisted = True
        except ValidationError as err:
            errors.update(err.message_dict)
        finally:
            if not persisted:
                obj.sources.remove(parent_head)
                if latest_versions:
                    latest_versions.update(is_latest_version=True)
                if obj.id:
                    obj.delete()
                errors['non_field_errors'] = [
                    'An error occurred while %s.' % errored_action
                ]

        return errors
Beispiel #30
0
def number_format(number, scale=0, decimal_separator=".", order_separator=","):
    """Format a number to scale with custom decimal and order separators.

    Args:
        number (int|float): Number to format.
        scale (int, optional): Number of decimals to include. Defaults to
            ``0``.
        decimal_separator (str, optional): Decimal separator to use. Defaults
            to ``'.'``.
        order_separator (str, optional): Order separator to use. Defaults to
            ``','``.

    Returns:
        str: Formatted number as string.

    Example:

        >>> number_format(1234.5678)
        '1,235'
        >>> number_format(1234.5678, 2, ',', '.')
        '1.234,57'

    .. versionadded:: 3.0.0
    """
    # Create a string formatter which converts number to the appropriately
    # scaled representation.
    fmt = "{{0:.{0:d}f}}".format(scale)

    try:
        num_parts = fmt.format(number).split(".")
    except ValueError:
        text = ""
    else:
        int_part = num_parts[0]
        dec_part = (num_parts + [""])[1]

        # Reverse the integer part, chop it into groups of 3, join on
        # `order_separator`, and then unreverse the string.
        int_part = order_separator.join(chop(int_part[::-1], 3))[::-1]

        text = decimal_separator.join(pyd.compact([int_part, dec_part]))

    return text
Beispiel #31
0
def number_format(number, scale=0, decimal_separator='.', order_separator=','):
    """Format a number to scale with custom decimal and order separators.

    Args:
        number (int|float): Number to format.
        scale (int, optional): Number of decimals to include. Defaults to
            ``0``.
        decimal_separator (str, optional): Decimal separator to use. Defaults
            to ``'.'``.
        order_separator (str, optional): Order separator to use. Defaults to
            ``','``.

    Returns:
        str: Formatted number as string.

    Example:

        >>> number_format(1234.5678)
        '1,235'
        >>> number_format(1234.5678, 2, ',', '.')
        '1.234,57'

    .. versionadded:: 3.0.0
    """
    # Create a string formatter which converts number to the appropriately
    # scaled representation.
    fmt = '{{0:.{0:d}f}}'.format(scale)

    try:
        num_parts = fmt.format(number).split('.')
    except ValueError:
        text = ''
    else:
        int_part = num_parts[0]
        dec_part = (num_parts + [''])[1]

        # Reverse the integer part, chop it into groups of 3, join on
        # `order_separator`, and then unreverse the string.
        int_part = order_separator.join(chop(int_part[::-1], 3))[::-1]

        text = decimal_separator.join(pyd.compact([int_part, dec_part]))

    return text
Beispiel #32
0
    def predict(self, text, original_text, pos):
        if not isinstance(text, str):
            text = str(text, "utf-8")
        doc = self.model(text)
        logger.info("english predict")

        def default_entity_mapping(entity):
            return {
                "tag": entity.label_,
                "entity":
                " ".join(original_text.split()[entity.start:entity.end]),
                "start": entity.start,
                "end": entity.end,
                "resolvedTo": {
                    'baseEntity':
                    " ".join(original_text.split()[entity.start:entity.end])
                }
            }

        default_entities = compact(list(map(default_entity_mapping, doc.ents)))
        pos_mapping = []
        if pos is not None:

            def default_pos_mapping(word):
                original_text_tokens = original_text.split()
                text_tokens = text.split()
                word_text = word.text
                word_index = text_tokens.index(word_text)
                return {
                    "text":
                    original_text_tokens[word_index]
                    if word_index < len(original_text_tokens) else word_text,
                    "lemma":
                    word.lemma_,
                    "tag":
                    word.tag_,
                    "pos":
                    word.pos_
                }

            pos_mapping = list(map(default_pos_mapping, doc))
        return default_entities, pos_mapping
Beispiel #33
0
def test_compact(case, expected):
    assert _.compact(case) == expected