Example #1
0
    if args.rm3:
        search_rankers.append('rm3')
        searcher.set_rm3()

    # invalid topics name
    if topics == {}:
        print(f'Topic {args.topics} Not Found')
        exit()

    # get re-ranker
    use_prcl = args.prcl and len(args.prcl) > 0 and args.alpha > 0
    if use_prcl is True:
        ranker = PseudoRelevanceClassifierReranker(searcher.index_dir,
                                                   args.vectorizer,
                                                   args.prcl,
                                                   r=args.r,
                                                   n=args.n,
                                                   alpha=args.alpha)

    # build output path
    output_path = args.output
    if output_path is None:
        if use_prcl is True:
            clf_rankers = []
            for t in args.prcl:
                if t == ClassifierType.LR:
                    clf_rankers.append('lr')
                elif t == ClassifierType.SVM:
                    clf_rankers.append('svm')

            r_str = f'prcl.r_{args.r}'
Example #2
0
        print(f'Using whitespace analyzer because of pretokenized topics')
        tokenizer = AutoTokenizer.from_pretrained(args.tokenizer)
        print(f'Using {args.tokenizer} to preprocess topics')

    if args.stopwords:
        analyzer = JDefaultEnglishAnalyzer.fromArguments(
            'porter', False, args.stopwords)
        searcher.set_analyzer(analyzer)
        print(f'Using custom stopwords={args.stopwords}')

    # get re-ranker
    use_prcl = args.prcl and len(args.prcl) > 0 and args.alpha > 0
    if use_prcl is True:
        ranker = PseudoRelevanceClassifierReranker(searcher.index_dir,
                                                   args.vectorizer,
                                                   args.prcl,
                                                   r=args.r,
                                                   n=args.n,
                                                   alpha=args.alpha)

    # build output path
    output_path = args.output
    if output_path is None:
        if use_prcl is True:
            clf_rankers = []
            for t in args.prcl:
                if t == ClassifierType.LR:
                    clf_rankers.append('lr')
                elif t == ClassifierType.SVM:
                    clf_rankers.append('svm')

            r_str = f'prcl.r_{args.r}'