예제 #1
0
def multinomial(path, s3_path, class_name, label, **kwargs):
    check_file(path['multinomial'], s3_path['multinomial'], **kwargs)
    try:
        with open(path['multinomial']['model'], 'rb') as fopen:
            multinomial = pickle.load(fopen)
        with open(path['multinomial']['vector'], 'rb') as fopen:
            vectorize = pickle.load(fopen)
    except:
        raise Exception(
            f"model corrupted due to some reasons, please run malaya.clear_cache('{class_name}/multinomial') and try again"
        )
    bpe, subword_mode = load_yttm(path['multinomial']['bpe'])

    from malaya.stem import _classification_textcleaning_stemmer, naive

    stemmer = naive()
    cleaning = partial(_classification_textcleaning_stemmer, stemmer=stemmer)

    if len(label) > 2:
        selected_class = MULTICLASS_BAYES
    else:
        selected_class = BINARY_BAYES
    return selected_class(
        multinomial=multinomial,
        label=label,
        vectorize=vectorize,
        bpe=bpe,
        subword_mode=subword_mode,
        cleaning=cleaning,
    )
예제 #2
0
def multinomial(path, s3_path, class_name, label, sigmoid=False, **kwargs):
    check_file(path['multinomial'], s3_path['multinomial'], **kwargs)
    try:
        with open(path['multinomial']['model'], 'rb') as fopen:
            multinomial = pickle.load(fopen)
        with open(path['multinomial']['vector'], 'rb') as fopen:
            vectorize = pickle.load(fopen)
    except:
        raise Exception(
            f"model corrupted due to some reasons, please run `malaya.clear_cache('{class_name}/multinomial')` and try again"
        )
    bpe, subword_mode = load_yttm(path['multinomial']['bpe'])

    stemmer = naive()
    cleaning = partial(_classification_textcleaning_stemmer, stemmer=stemmer)

    if sigmoid:
        selected_class = MultilabelBayes
    else:
        if len(label) > 2:
            selected_class = MulticlassBayes
        else:
            selected_class = BinaryBayes
    return selected_class(
        multinomial=multinomial,
        label=label,
        vectorize=vectorize,
        bpe=bpe,
        subword_mode=subword_mode,
        cleaning=cleaning,
    )
예제 #3
0
def multinomial(path, s3_path, module, label, sigmoid=False, **kwargs):
    path = check_file(path['multinomial'], s3_path['multinomial'], **kwargs)
    try:
        with open(path['model'], 'rb') as fopen:
            multinomial = pickle.load(fopen)
        with open(path['vector'], 'rb') as fopen:
            vectorize = pickle.load(fopen)
    except BaseException:
        path = os.path.normpath(f'{module}/multinomial')
        raise Exception(
            f"model corrupted due to some reasons, please run `malaya.clear_cache('{path}')` and try again"
        )

    bpe = YTTMEncoder(vocab_file=path['bpe'])

    stemmer = naive()
    cleaning = partial(_classification_textcleaning_stemmer, stemmer=stemmer)

    if sigmoid:
        selected_model = MultilabelBayes
    else:
        if len(label) > 2:
            selected_model = MulticlassBayes
        else:
            selected_model = BinaryBayes

    return selected_model(
        multinomial=multinomial,
        label=label,
        vectorize=vectorize,
        bpe=bpe,
        cleaning=cleaning,
    )
예제 #4
0
파일: toxicity.py 프로젝트: tx-qi/malaya
def multinomial(**kwargs):
    """
    Load multinomial toxicity model.

    Returns
    -------
    result : malaya.model.ml.MULTILABEL_BAYES class
    """
    import pickle

    check_file(PATH_TOXIC['multinomial'], S3_PATH_TOXIC['multinomial'],
               **kwargs)

    try:
        with open(PATH_TOXIC['multinomial']['model'], 'rb') as fopen:
            multinomial = pickle.load(fopen)
        with open(PATH_TOXIC['multinomial']['vector'], 'rb') as fopen:
            vectorize = pickle.load(fopen)
    except:
        raise Exception(
            f"model corrupted due to some reasons, please run malaya.clear_cache('toxic/multinomial') and try again"
        )

    from malaya.text.bpe import load_yttm
    from malaya.stem import _classification_textcleaning_stemmer, naive

    stemmer = naive()
    cleaning = partial(_classification_textcleaning_stemmer, stemmer=stemmer)

    bpe, subword_mode = load_yttm(PATH_TOXIC['multinomial']['bpe'])

    return MULTILABEL_BAYES(
        multinomial=multinomial,
        label=label,
        vectorize=vectorize,
        bpe=bpe,
        subword_mode=subword_mode,
        cleaning=cleaning,
    )