Python ItemSelector 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: transformers.item_selector

클래스/타입: ItemSelector

hotexamples.com에서의 예제들: 5

Python ItemSelector - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 transformers.item_selector.ItemSelector에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

ItemSelector(5)

자주 사용되는 메소드들

ItemSelector (5)

예제 #1

파일 보기

    def __init__(self, threshold=0.5, modelpath=''):
        self.threshold = threshold
        self.modelpath = modelpath
        self.model = None
        if self.modelpath:
            self.model = xgb.Booster()
            self.model.load_model(modelpath)

        morph_extractor = MorphologyExtractor(sparse=True)
        self.pipeline = SparseUnion([
            ('orig', Pipeline([
                ('select', ItemSelector('before')),
                ('features', SparseUnion([
                    ('char', StringToChar(10, to_coo=True)),
                    ('ctx', morph_extractor),
                ])),
            ])),
            ('prev', Pipeline([
                ('select', ItemSelector('prev')),
                ('features', SparseUnion([
                    ('char', StringToChar(5, to_coo=True)),
                    ('ctx', morph_extractor),
                ])),
            ])),
            ('next', Pipeline([
                ('select', ItemSelector('next')),
                ('features', SparseUnion([
                    ('char', StringToChar(5, to_coo=True)),
                    ('ctx', morph_extractor),
                ])),
            ]))
        ])

예제 #2

파일 보기

파일: add_case_transformer.py 프로젝트: rumbok/kaggle_google_text_norm_ru

    def __init__(self, modelpath=''):
        self.modelpath = modelpath
        self.model = None
        if self.modelpath:
            self.model = xgb.Booster()
            self.model.load_model(modelpath)

        self.class_type = CategoricalDtype(
            categories=['PLAIN', 'DATE', 'PUNCT', 'ORDINAL', 'VERBATIM', 'LETTERS', 'CARDINAL', 'MEASURE',
                        'TELEPHONE', 'ELECTRONIC', 'DECIMAL', 'DIGIT', 'FRACTION', 'MONEY', 'TIME',
                        'TRANS', 'DASH'])

        morph_extractor = MorphologyExtractor(sparse=True, multi_words=True)
        self.pipeline = SparseUnion([
            ('class', Pipeline([
                ('select', ItemSelector('class')),
                ('codes', ToCategoryCodes(self.class_type)),
                ('reshape', Reshape2d()),
                ('onehot', OneHotEncoder(n_values=len(self.class_type.categories), sparse=True, dtype=np.uint8))
            ])),
            ('orig', Pipeline([
                ('select', ItemSelector('before')),
                ('features', SparseUnion([
                    ('char', StringToChar(10, to_coo=True)),
                    ('ctx', morph_extractor),
                ])),
            ])),
            ('prev_prev', Pipeline([
                ('select', ItemSelector('prev_prev')),
                ('features', SparseUnion([
                    ('char', StringToChar(-5, to_coo=True)),
                    ('ctx', morph_extractor),
                ])),
            ])),
            ('prev', Pipeline([
                ('select', ItemSelector('prev')),
                ('features', SparseUnion([
                    ('char', StringToChar(-5, to_coo=True)),
                    ('ctx', morph_extractor),
                ])),
            ])),
            ('next', Pipeline([
                ('select', ItemSelector('next')),
                ('features', SparseUnion([
                    ('char', StringToChar(-5, to_coo=True)),
                    ('ctx', morph_extractor),
                ])),
            ])),
            ('next_next', Pipeline([
                ('select', ItemSelector('next_next')),
                ('features', SparseUnion([
                    ('char', StringToChar(-5, to_coo=True)),
                    ('ctx', morph_extractor),
                ])),
            ])),
        ])
        self.case_extractor = CaseExtractor(multi_words=True)

예제 #3

파일 보기

파일: add_class_transformer.py 프로젝트: rumbok/kaggle_google_text_norm_ru

    def __init__(self, modelpath=''):
        self.modelpath = modelpath
        self.model = None
        if self.modelpath:
            self.model = xgb.Booster()
            self.model.load_model(modelpath)

        self.class_type = CategoricalDtype(
            categories=['PLAIN', 'DATE', 'PUNCT', 'ORDINAL', 'VERBATIM', 'LETTERS', 'CARDINAL', 'MEASURE',
                        'TELEPHONE', 'ELECTRONIC', 'DECIMAL', 'DIGIT', 'FRACTION', 'MONEY', 'TIME',
                        'TRANS', 'DASH'])

        morph_extractor = MorphologyExtractor(sparse=True)
        self.pipeline = SparseUnion([
            ('orig', Pipeline([
                ('select', ItemSelector('before')),
                ('features', SparseUnion([
                    ('char', StringToChar(10, to_coo=True)),
                    ('ctx', morph_extractor),
                ])),
            ])),
            ('prev', Pipeline([
                ('select', ItemSelector('prev')),
                ('features', SparseUnion([
                    ('char', StringToChar(5, to_coo=True)),
                    ('ctx', morph_extractor),
                ])),
            ])),
            ('next', Pipeline([
                ('select', ItemSelector('next')),
                ('features', SparseUnion([
                    ('char', StringToChar(5, to_coo=True)),
                    ('ctx', morph_extractor),
                ])),
            ]))
        ])

예제 #4

파일 보기

classes = frozenset([
    'CARDINAL', 'DATE', 'MEASURE', 'DECIMAL', 'MONEY', 'ORDINAL', 'FRACTION',
    'TIME'
])
df = df[~(df['before'] == df['after']) & (df['class'].isin(classes))]
class_type = CategoricalDtype(categories=[
    'PLAIN', 'DATE', 'PUNCT', 'ORDINAL', 'VERBATIM', 'LETTERS', 'CARDINAL',
    'MEASURE', 'TELEPHONE', 'ELECTRONIC', 'DECIMAL', 'DIGIT', 'FRACTION',
    'MONEY', 'TIME', 'TRANS', 'DASH'
])
print(df.info())

morph_extractor = MorphologyExtractor(sparse=True, multi_words=True)
before_pipeline = SparseUnion([
    ('class',
     Pipeline([('select', ItemSelector('class')),
               ('codes', ToCategoryCodes(class_type)),
               ('reshape', Reshape2d()),
               ('onehot',
                OneHotEncoder(n_values=len(class_type.categories),
                              sparse=True,
                              dtype=np.uint8))])),
    ('orig',
     Pipeline([
         ('select', ItemSelector('before')),
         ('features',
          SparseUnion([
              ('char', StringToChar(10, to_coo=True)),
              ('ctx', morph_extractor),
          ])),
     ])),

예제 #5

파일 보기

파일: self_model_trainer.py 프로젝트: rumbok/kaggle_google_text_norm_ru

import gc
from sklearn.metrics import accuracy_score

df = load_train(['before', 'after']).fillna('')
df['self'] = (df['before'] == df['after'])
df['prev'] = df['before'].shift(1)
df['next'] = df['before'].shift(-1)
df = df.fillna('')
del df['after']
print(df.info())

morph_extractor = MorphologyExtractor(sparse=True)
pipeline = SparseUnion([
    ('orig',
     Pipeline([
         ('select', ItemSelector('before')),
         ('features',
          SparseUnion([
              ('char', StringToChar(10, to_coo=True)),
              ('ctx', morph_extractor),
          ])),
     ])),
    ('prev',
     Pipeline([
         ('select', ItemSelector('prev')),
         ('features',
          SparseUnion([
              ('char', StringToChar(5, to_coo=True)),
              ('ctx', morph_extractor),
          ])),
     ])),