コード例 #1
0
def create_word_bucketer_from_profile(bucket_type,
                                      freq_counts=None,
                                      freq_count_file=None,
                                      freq_corpus_file=None,
                                      freq_data=None,
                                      label_set=None,
                                      bucket_cutoffs=None,
                                      case_insensitive=False):
    if type(bucket_cutoffs) == str:
        bucket_cutoffs = [
            arg_utils.parse_intfloat(x) for x in bucket_cutoffs.split(':')
        ]
    if bucket_type == 'freq':
        return FreqWordBucketer(freq_counts=freq_counts,
                                freq_count_file=freq_count_file,
                                freq_corpus_file=freq_corpus_file,
                                freq_data=freq_data,
                                bucket_cutoffs=bucket_cutoffs,
                                case_insensitive=case_insensitive)
    if bucket_type == 'case':
        return CaseWordBucketer()
    elif bucket_type == 'label':
        return LabelWordBucketer(label_set=label_set)
    elif bucket_type == 'numlabel':
        return NumericalLabelWordBucketer(bucket_cutoffs=bucket_cutoffs)
    else:
        raise ValueError(f'Illegal bucket type {bucket_type}')
コード例 #2
0
def create_sentence_bucketer_from_profile(bucket_type,
                                          score_type=None,
                                          bucket_cutoffs=None,
                                          label_set=None,
                                          case_insensitive=False):
    if type(bucket_cutoffs) == str:
        bucket_cutoffs = [
            arg_utils.parse_intfloat(x) for x in bucket_cutoffs.split(':')
        ]
    if bucket_type == 'score':
        return ScoreSentenceBucketer(score_type,
                                     bucket_cutoffs=bucket_cutoffs,
                                     case_insensitive=case_insensitive)
    elif bucket_type == 'length':
        return LengthSentenceBucketer(bucket_cutoffs=bucket_cutoffs)
    elif bucket_type == 'lengthdiff':
        return LengthDiffSentenceBucketer(bucket_cutoffs=bucket_cutoffs)
    elif bucket_type == 'label':
        return LabelSentenceBucketer(label_set=label_set)
    elif bucket_type == 'multilabel':
        return MultiLabelSentenceBucketer(label_set=label_set)
    elif bucket_type == 'numlabel':
        return NumericalLabelSentenceBucketer(bucket_cutoffs=bucket_cutoffs)
    else:
        raise NotImplementedError(f'Illegal bucket type {bucket_type}')