def update(self, instance, validated_data): instance.concept_class = validated_data.get('concept_class', instance.concept_class) instance.datatype = validated_data.get('datatype', instance.datatype) instance.extras = validated_data.get('extras', instance.extras) instance.external_id = validated_data.get('external_id', instance.external_id) instance.comment = validated_data.get('update_comment') or validated_data.get('comment') instance.retired = validated_data.get('retired', instance.retired) new_names = [ LocalizedText( **{k: v for k, v in name.items() if k not in ['name_type']} ) for name in validated_data.get('names', []) ] new_descriptions = [ LocalizedText( **{k: v for k, v in desc.items() if k not in ['description_type']} ) for desc in validated_data.get('descriptions', []) ] instance.cloned_names = compact(new_names) instance.cloned_descriptions = compact(new_descriptions) errors = Concept.persist_clone(instance, self.context.get('request').user) if errors: self._errors.update(errors) return instance
def make_parts(self): prev_line = None orgs = self.resource_distribution.get('Organization', None) sources = self.resource_distribution.get('Source', None) collections = self.resource_distribution.get('Collection', None) if orgs: self.parts = [orgs] if sources: self.parts.append(sources) if collections: self.parts.append(collections) self.parts = compact(self.parts) self.parts.append([]) for data in self.input_list: line = json.loads(data) data_type = line.get('type', None).lower() if data_type not in ['organization', 'source', 'collection']: if prev_line: prev_type = prev_line.get('type').lower() if prev_type == data_type or ( data_type not in ['concept', 'mapping'] and prev_type not in ['concept', 'mapping']): self.parts[-1].append(line) else: self.parts.append([line]) else: self.parts[-1].append(line) prev_line = line self.parts = compact(self.parts)
def predict(self, text, original_text, pos): doc = self.model(text) def default_entity_mapping(entity): return { "tag": entity.label_, "entity": " ".join(original_text.split()[entity.start:entity.end]), "start": entity.start, "end": entity.end } default_entities = compact(list(map(default_entity_mapping, doc.ents))) pos_mapping = [] if pos is not None: def default_pos_mapping(word): original_text_tokens = original_text.split() text_tokens = text.split() word_text = word.text word_index = text_tokens.index(word_text) return { "text": original_text_tokens[word_index] if word_index < len(original_text_tokens) else word_text, "lemma": word.lemma_, "tag": word.tag_, "pos": word.pos_ } pos_mapping = list(map(default_pos_mapping, doc)) return default_entities, pos_mapping
def create(self, request, **kwargs): # pylint: disable=unused-argument if not self.parent_resource: return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) permission = HasOwnership() if not permission.has_object_permission(request, self, self.parent_resource): return Response(status=status.HTTP_403_FORBIDDEN) serializer = self.get_serializer( data={ 'mnemonic': request.data.get('id'), 'supported_locales': compact(request.data.pop('supported_locales', '').split(',')), 'version': HEAD, **request.data, **{ self.parent_resource.resource_type.lower(): self.parent_resource.id } }) if serializer.is_valid(): instance = serializer.save(force_insert=True) if serializer.is_valid(): headers = self.get_success_headers(serializer.data) serializer = self.get_detail_serializer(instance) return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers) return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
def saved_unsaved_names(self): unsaved_names = get(self, 'cloned_names', []) if self.id: return compact([*list(self.names.all()), *unsaved_names]) return unsaved_names
def persist_clone(cls, obj, user=None, create_parent_version=True, parent_concept_uris=None, **kwargs): # pylint: disable=too-many-statements errors = dict() if not user: errors['version_created_by'] = PERSIST_CLONE_SPECIFY_USER_ERROR return errors obj.created_by = user obj.updated_by = user obj.version = obj.version or generate_temp_version() parent = obj.parent parent_head = parent.head persisted = False versioned_object = obj.versioned_object prev_latest_version = versioned_object.versions.exclude(id=obj.id).filter(is_latest_version=True).first() try: with transaction.atomic(): cls.pause_indexing() obj.is_latest_version = True obj.save(**kwargs) if obj.id: obj.version = str(obj.id) obj.save() obj.set_locales() obj.clean() # clean here to validate locales that can only be saved after obj is saved obj.update_versioned_object() if prev_latest_version: prev_latest_version.is_latest_version = False prev_latest_version.save() obj.sources.set(compact([parent, parent_head])) persisted = True cls.resume_indexing() if get(settings, 'TEST_MODE', False): process_hierarchy_for_concept_version( obj.id, get(prev_latest_version, 'id'), parent_concept_uris, create_parent_version) else: process_hierarchy_for_concept_version.delay( obj.id, get(prev_latest_version, 'id'), parent_concept_uris, create_parent_version) def index_all(): if prev_latest_version: prev_latest_version.index() obj.index() transaction.on_commit(index_all) except ValidationError as err: errors.update(err.message_dict) finally: cls.resume_indexing() if not persisted: if prev_latest_version: prev_latest_version.is_latest_version = True prev_latest_version.save() if obj.id: obj.remove_locales() obj.sources.remove(parent_head) obj.delete() errors['non_field_errors'] = [PERSIST_CLONE_ERROR] return errors
def predict(self, text, original_text, pos): assert self.model is not None, "Please build the NER before using it for prediction" #text = text.decode('utf-8') results = self.model.extract_entities(text.split()) def entity_mapping(e): score = e[2] if score > 0: entity_range = e[0] return { "entity": " ".join(original_text.split() [entity_range[0]:entity_range[-1] + 1]), "tag": e[1], "score": e[2], "start": entity_range[0], "end": entity_range[-1] + 1 } entities = compact(list(map(entity_mapping, results))) return entities, None
def get_csv(self, request, queryset=None): filename, url, prepare_new_file, is_member = None, None, True, False parent = None # TODO: fix this for parent (owner) if parent: prepare_new_file = False user = request.query_params.get('user', None) is_member = self._is_member(parent, user) try: path = request.__dict__.get('_request').path filename = '_'.join(compact(path.split('/'))).replace('.', '_') kwargs = { 'filename': filename, } except Exception: # pylint: disable=broad-except kwargs = {} if filename and prepare_new_file: url = get_csv_from_s3(filename, is_member) if not url: queryset = queryset or self._get_query_set_from_view(is_member) data = self.get_csv_rows(queryset) if hasattr(self, 'get_csv_rows') else queryset.values() url = write_csv_to_s3(data, is_member, **kwargs) return Response({'url': url}, status=200)
def update(self, request): if not self.parent_resource: return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) self.object = self.get_object() save_kwargs = { 'force_update': True, 'parent_resource': self.parent_resource } success_status_code = status.HTTP_200_OK supported_locales = request.data.pop('supported_locales', '') if isinstance(supported_locales, str): supported_locales = compact(supported_locales.split(',')) request.data['supported_locales'] = supported_locales serializer = self.get_serializer(self.object, data=request.data, partial=True) if serializer.is_valid(): self.object = serializer.save(**save_kwargs) if serializer.is_valid(): serializer = self.get_detail_serializer(self.object) return Response(serializer.data, status=success_status_code) return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
def predict(self, text, original_text, pos): tokens = tokenize_utterance(text) entities = self.model.extract_entities(tokens) def default_entity_mapping(entity): range = entity[0] ind = [] for i in range: ind.append(i) return { "tag": entity[1], "entity": " ".join(original_text.split()[range[0]:range[-1] + 1]), "start": ind[0], "end": ind[-1] + 1, "resolvedTo": { 'baseEntity': " ".join(original_text.split()[range[0]:range[-1] + 1]) } } default_entities = compact(list(map(default_entity_mapping, entities))) return default_entities, None
def predict(self, text, original_text, pos): assert self.model is not None, "Please build the NER before using it for prediction" input_text_pos_tag = pos_tags_predict(text, original_text, language) original_text_list = original_text.split() input_text_test = sent2features(input_text_pos_tag) entities_list = self.model.predict_marginals_single(input_text_test) def entity_mapping(indx): tag = max(iter(entities_list[indx].items()), key=operator.itemgetter(1))[0] if tag != "O": return { "entity": original_text_list[indx], # input_text_pos_tag[indx][0], "tag": tag, "score": entities_list[indx][tag], "start": indx, "end": indx + 1 } entities = compact( list(map(entity_mapping, list(range(0, len(input_text_pos_tag)))))) entities = format_response(entities) return entities, None
def prepare(self, utterances): logger = logging.getLogger(__name__) predefined_tags = get_predefined_entities(self.serviceid) def extract_entity_names(entry_each): return get(entry_each, "entity") patterns = list( map(extract_entity_names, get_pattern_entities(self.serviceid))) phrases = list( map(extract_entity_names, get_phrase_entities(self.serviceid))) label_list = [] def get_sample(data): assert 'mapping' in data, "Token mapping missing from training data" assert "utterance" in data, "Utterance text missing from training data" try: utterance = get(data, "case_converted_utterance") logger.debug("Preparing utterance: %s" % utterance) mapping = json.loads(get(data, "mapping")) assert "tags" in mapping, "Tags missing from training data" tags = get(mapping, 'tags') tokens = utterance.split() sample = ner_training_instance(tokens) for tag in tags: start = get(tag, 'start') end = get(tag, 'end') label = get(tag, 'tag') label = label.encode('utf-8') # ignoreTag = (label.upper() in predefined_tags) ignoreTag = (label.upper() in predefined_tags) \ or (label in patterns) or (label in phrases) if not ignoreTag: assert all(v is not None for v in [start, end, label]), \ "Missing information for adding entities to training" logger.info("Adding entity: %s" % label) logger.info("Start range: %s" % start) logger.info("End range: %s" % end) sample.add_entity(range(start, end), label.upper()) if not label.upper() in label_list: label_list.append(label.upper()) logger.info("label_list %s" % (label_list)) data['ner_trained'] = True return sample, data except (TypeError, Exception) as e: data['ner_trained'] = False return None, data assert len(utterances) > 0, "Not enough utterances for training" results = list(map(get_sample, utterances)) assert len( label_list ) > 0, "Unable to do entity training as no custom entities are mapped. Please map atleast 2 custom entities to proceed." assert len( label_list ) > 1, "Atleast 2 custom entities are mandatory to perform entity training. Please add one more custom entity to proceed." samples = compact([items[0] for items in results]) trained_utterances = [items[1] for items in results] return samples, trained_utterances
def create_new_version_for(cls, instance, data, user): instance.concept_class = data.get('concept_class', instance.concept_class) instance.datatype = data.get('datatype', instance.datatype) instance.extras = data.get('extras', instance.extras) instance.external_id = data.get('external_id', instance.external_id) instance.comment = data.get('update_comment') or data.get('comment') instance.retired = data.get('retired', instance.retired) new_names = LocalizedText.build_locales(data.get('names', [])) new_descriptions = LocalizedText.build_locales( data.get('descriptions', []), 'description') instance.cloned_names = compact(new_names) instance.cloned_descriptions = compact(new_descriptions) return cls.persist_clone(instance, user)
def persist_clone(cls, obj, user=None, **kwargs): # pylint: disable=too-many-statements errors = dict() if not user: errors['version_created_by'] = PERSIST_CLONE_SPECIFY_USER_ERROR return errors obj.created_by = user obj.updated_by = user obj.version = TEMP parent = obj.parent parent_head = parent.head persisted = False latest_version = None try: with transaction.atomic(): cls.pause_indexing() obj.is_latest_version = True obj.save(**kwargs) obj.version = str(obj.id) obj.save() if obj.id: obj.set_locales() obj.clean( ) # clean here to validate locales that can only be saved after obj is saved obj.update_versioned_object() versioned_object = obj.versioned_object latest_version = versioned_object.versions.exclude( id=obj.id).filter(is_latest_version=True).first() latest_version.is_latest_version = False latest_version.save() obj.sources.set(compact([parent, parent_head])) persisted = True cls.resume_indexing() def index_all(): parent.save() parent_head.save() latest_version.save() obj.save() transaction.on_commit(index_all) except ValidationError as err: errors.update(err.message_dict) finally: cls.resume_indexing() if not persisted: if latest_version: latest_version.is_latest_version = True latest_version.save() if obj.id: obj.remove_locales() obj.sources.remove(parent_head) obj.delete() errors['non_field_errors'] = [PERSIST_CLONE_ERROR] return errors
def post(self, _, resource): model = get_resource_class_from_resource_name(resource) if not model: return Response(status=status.HTTP_404_NOT_FOUND) ids = self.request.data.get('ids', None) if ids: ids = compact([i.strip() for i in compact(ids.split(','))]) if not ids: return Response(status=status.HTTP_400_BAD_REQUEST) for instance in model.objects.filter( **{"{}__in".format(model.mnemonic_attr): ids}): instance.save() return Response(status=status.HTTP_202_ACCEPTED)
def update_mappings(self): from core.mappings.models import Mapping uris = compact([self.uri, self.canonical_url]) for mapping in Mapping.objects.filter(to_source__isnull=True, to_source_url__in=uris): mapping.to_source = self mapping.save() for mapping in Mapping.objects.filter(from_source__isnull=True, from_source_url__in=uris): mapping.from_source = self mapping.save()
def is_processing(self): background_ids = compact(self._background_process_ids) if background_ids: for process_id in background_ids.copy(): res = AsyncResult(process_id) if res.successful() or res.failed(): self.remove_processing(process_id) else: return True return bool(self._background_process_ids)
def create_new_version_for(cls, instance, data, user, create_parent_version=True): instance.concept_class = data.get('concept_class', instance.concept_class) instance.datatype = data.get('datatype', instance.datatype) instance.extras = data.get('extras', instance.extras) instance.external_id = data.get('external_id', instance.external_id) instance.comment = data.get('update_comment') or data.get('comment') instance.retired = data.get('retired', instance.retired) new_names = LocalizedText.build_locales(data.get('names', [])) new_descriptions = LocalizedText.build_locales(data.get('descriptions', []), 'description') has_parent_concept_uris_attr = 'parent_concept_urls' in data parent_concept_uris = data.pop('parent_concept_urls', None) instance.cloned_names = compact(new_names) instance.cloned_descriptions = compact(new_descriptions) if not parent_concept_uris and has_parent_concept_uris_attr: parent_concept_uris = [] return cls.persist_clone(instance, user, create_parent_version, parent_concept_uris)
def queue_tasks(self, part_list, is_child): chunked_lists = compact( self.chunker_list(part_list, self.parallel ) if is_child else [part_list]) jobs = group( bulk_import_parts_inline.s(_list, self.username, self.update_if_exists) for _list in chunked_lists) group_result = jobs.apply_async(queue='concurrent') self.groups.append(group_result) self.tasks += group_result.results
def persist_clone(cls, obj, user=None, **kwargs): errors = dict() if not user: errors['version_created_by'] = PERSIST_CLONE_SPECIFY_USER_ERROR return errors obj.version = obj.version or generate_temp_version() obj.created_by = user obj.updated_by = user parent = obj.parent parent_head = parent.head persisted = False prev_latest_version = None try: with transaction.atomic(): cls.pause_indexing() obj.is_latest_version = True obj.save(**kwargs) if obj.id: obj.version = str(obj.id) obj.save() obj.update_versioned_object() versioned_object = obj.versioned_object prev_latest_version = versioned_object.versions.exclude(id=obj.id).filter( is_latest_version=True).first() if prev_latest_version: prev_latest_version.is_latest_version = False prev_latest_version.save() obj.sources.set(compact([parent, parent_head])) persisted = True cls.resume_indexing() def index_all(): if prev_latest_version: prev_latest_version.index() obj.index() transaction.on_commit(index_all) except ValidationError as err: errors.update(err.message_dict) finally: cls.resume_indexing() if not persisted: if obj.id: obj.sources.remove(parent_head) if prev_latest_version: prev_latest_version.is_latest_version = True prev_latest_version.save() obj.delete() errors['non_field_errors'] = [PERSIST_CLONE_ERROR] return errors
def update_mappings(self): from core.mappings.models import Mapping parent_uris = compact([self.parent.uri, self.parent.canonical_url]) for mapping in Mapping.objects.filter(to_concept_code=self.mnemonic, to_source_url__in=parent_uris, to_concept__isnull=True): mapping.to_concept = self mapping.save() for mapping in Mapping.objects.filter(from_concept_code=self.mnemonic, from_source_url__in=parent_uris, from_concept__isnull=True): mapping.from_concept = self mapping.save()
def run(self) -> (dict, List[dict]): """ Get the hardware information. This method returns *almost* DeviceHub ready information in a tuple, where the first element is information related to the overall machine, like the S/N of the computer, and the second item is a list of hardware information per component. """ computer = self.computer() components = chain(self.processors(), self.ram_modules(), self.hard_drives(), self.graphic_cards(), [self.motherboard()], self.network_adapters(), self.sound_cards()) return computer, compact(components)
def self_desc(cls, omit=None): '''Method to get self description, used at init.''' desc_list = [f'{get_class_name(cls)}:'] omit_list = ps.compact(cast_list(omit)) for k, v in get_class_attr(cls).items(): if k in omit_list: continue if k == 'spec': # spec components are described at their object level; for session, only desc spec.meta desc_v = pformat(v['meta']) elif ps.is_dict(v) or ps.is_dict(ps.head(v)): desc_v = pformat(v) else: desc_v = v desc_list.append(f'- {k} = {desc_v}') desc = '\n'.join(desc_list) return desc
def monthly_chart_data(queryset: QuerySet) -> Dict: series_queryset = EventType.objects.filter( events__in=queryset).distinct().order_by("name").values( "name", "colour") series_dict = { item["name"]: { "name": item["name"], "color": item["colour"], "data": [] } for item in series_queryset } chart_data: Dict[str, Any] = {"categories": [], "series": []} data_queryset = (queryset.annotate( datetime=TruncMonth("start_datetime")).order_by( "datetime", "type__name").values( "datetime", "type__name").distinct().annotate(count=Count("id"))) data_dict = [{ **item, "month": item["datetime"].strftime("%Y-%m") } for item in data_queryset] structured_data_dict = pydash.group_by(data_dict, "month") for month, data in structured_data_dict.items(): structured_data_dict[month] = pydash.group_by(data, "type__name") chart_data["categories"] = list(structured_data_dict.keys()) for category in chart_data["categories"]: event_types = structured_data_dict[category].keys() for event_type, serie in series_dict.items(): if event_type in event_types: series_dict[event_type]["data"].append( structured_data_dict[category][event_type][0]["count"]) else: series_dict[event_type]["data"].append(None) chart_data["series"] = list(series_dict.values()) average_data = [] for index in range(len(chart_data["categories"])): truly_values = pydash.compact( [serie["data"][index] for serie in chart_data["series"]]) average_data.append(sum(truly_values)) chart_data["average"] = mean(average_data) if len(average_data) else None return chart_data
def prepath_to_idxs(prepath): '''Extract trial index and session index from prepath if available''' _, _, prename, spec_name, _, _ = prepath_split(prepath) idxs_tail = prename.replace(spec_name, '').strip('_') idxs_strs = ps.compact(idxs_tail.split('_')[:2]) if ps.is_empty(idxs_strs): return None, None tidx = idxs_strs[0] assert tidx.startswith('t') trial_index = int(tidx.strip('t')) if len(idxs_strs) == 1: # has session session_index = None else: sidx = idxs_strs[1] assert sidx.startswith('s') session_index = int(sidx.strip('s')) return trial_index, session_index
def persist_clone(cls, obj, user=None, **kwargs): errors = dict() if not user: errors[ 'version_created_by'] = 'Must specify which user is attempting to create a new concept version.' return errors obj.created_by = user obj.version = TEMP parent = obj.parent parent_head = parent.head persisted = False errored_action = 'saving new concept version' latest_versions = None try: obj.is_latest_version = True obj.save(**kwargs) obj.version = str(obj.id) obj.save() obj.set_locales() obj.clean( ) # clean here to validate locales that can only be saved after obj is saved latest_versions = obj.versions.exclude(id=obj.id).filter( is_latest_version=True) latest_versions.update(is_latest_version=False) obj.sources.set(compact([parent, parent_head])) # to update counts parent.save() parent_head.save() persisted = True except ValidationError as err: errors.update(err.message_dict) finally: if not persisted: obj.remove_locales() obj.sources.remove(parent_head) if latest_versions: latest_versions.update(is_latest_version=True) if obj.id: obj.delete() errors['non_field_errors'] = [ 'An error occurred while %s.' % errored_action ] return errors
def clean(text): """Trim and replace multiple spaces with a single space. Args: text (str): String to clean. Returns: str: Cleaned string. Example: >>> clean('a b c d') 'a b c d' .. versionadded:: 3.0.0 """ text = pyd.to_string(text) return ' '.join(pyd.compact(text.split()))
def clean(text): """Trim and replace multiple spaces with a single space. Args: text (str): String to clean. Returns: str: Cleaned string. Example: >>> clean('a b c d') 'a b c d' ..versionadded:: 3.0.0 """ text = pyd.to_string(text) return " ".join(pyd.compact(text.split()))
def persist_clone(cls, obj, user=None, **kwargs): errors = dict() if not user: errors[ 'version_created_by'] = "Must specify which user is attempting to create a new {} version.".format( cls.get_resource_url_kwarg()) return errors obj.version = TEMP obj.created_by = user parent = obj.parent parent_head = parent.head persisted = False errored_action = 'saving new mapping version' latest_versions = None try: obj.is_latest_version = True obj.full_clean() obj.save(**kwargs) obj.version = str(obj.id) obj.save() latest_versions = obj.versions.exclude(id=obj.id).filter( is_latest_version=True) latest_versions.update(is_latest_version=False) obj.sources.set(compact([parent, parent_head])) # to update counts parent.save() parent_head.save() persisted = True except ValidationError as err: errors.update(err.message_dict) finally: if not persisted: obj.sources.remove(parent_head) if latest_versions: latest_versions.update(is_latest_version=True) if obj.id: obj.delete() errors['non_field_errors'] = [ 'An error occurred while %s.' % errored_action ] return errors
def number_format(number, scale=0, decimal_separator=".", order_separator=","): """Format a number to scale with custom decimal and order separators. Args: number (int|float): Number to format. scale (int, optional): Number of decimals to include. Defaults to ``0``. decimal_separator (str, optional): Decimal separator to use. Defaults to ``'.'``. order_separator (str, optional): Order separator to use. Defaults to ``','``. Returns: str: Formatted number as string. Example: >>> number_format(1234.5678) '1,235' >>> number_format(1234.5678, 2, ',', '.') '1.234,57' .. versionadded:: 3.0.0 """ # Create a string formatter which converts number to the appropriately # scaled representation. fmt = "{{0:.{0:d}f}}".format(scale) try: num_parts = fmt.format(number).split(".") except ValueError: text = "" else: int_part = num_parts[0] dec_part = (num_parts + [""])[1] # Reverse the integer part, chop it into groups of 3, join on # `order_separator`, and then unreverse the string. int_part = order_separator.join(chop(int_part[::-1], 3))[::-1] text = decimal_separator.join(pyd.compact([int_part, dec_part])) return text
def number_format(number, scale=0, decimal_separator='.', order_separator=','): """Format a number to scale with custom decimal and order separators. Args: number (int|float): Number to format. scale (int, optional): Number of decimals to include. Defaults to ``0``. decimal_separator (str, optional): Decimal separator to use. Defaults to ``'.'``. order_separator (str, optional): Order separator to use. Defaults to ``','``. Returns: str: Formatted number as string. Example: >>> number_format(1234.5678) '1,235' >>> number_format(1234.5678, 2, ',', '.') '1.234,57' .. versionadded:: 3.0.0 """ # Create a string formatter which converts number to the appropriately # scaled representation. fmt = '{{0:.{0:d}f}}'.format(scale) try: num_parts = fmt.format(number).split('.') except ValueError: text = '' else: int_part = num_parts[0] dec_part = (num_parts + [''])[1] # Reverse the integer part, chop it into groups of 3, join on # `order_separator`, and then unreverse the string. int_part = order_separator.join(chop(int_part[::-1], 3))[::-1] text = decimal_separator.join(pyd.compact([int_part, dec_part])) return text
def predict(self, text, original_text, pos): if not isinstance(text, str): text = str(text, "utf-8") doc = self.model(text) logger.info("english predict") def default_entity_mapping(entity): return { "tag": entity.label_, "entity": " ".join(original_text.split()[entity.start:entity.end]), "start": entity.start, "end": entity.end, "resolvedTo": { 'baseEntity': " ".join(original_text.split()[entity.start:entity.end]) } } default_entities = compact(list(map(default_entity_mapping, doc.ents))) pos_mapping = [] if pos is not None: def default_pos_mapping(word): original_text_tokens = original_text.split() text_tokens = text.split() word_text = word.text word_index = text_tokens.index(word_text) return { "text": original_text_tokens[word_index] if word_index < len(original_text_tokens) else word_text, "lemma": word.lemma_, "tag": word.tag_, "pos": word.pos_ } pos_mapping = list(map(default_pos_mapping, doc)) return default_entities, pos_mapping
def test_compact(case, expected): assert _.compact(case) == expected