Python Tagger.Tagger Exemples, tagger.Tagger.Tagger Python Exemples

Exemple #1

0

Afficher le fichier

def AIC_predict():
    print("iter : ", config.iter)
    # 형태소분석된 raw_sentence에 PIC 처리
    # input : config.result_input_path
    # output : config.result_processed_path
    main_tagger_PIC = Tagger()
    main_tagger_PIC.taggingPIC("result_tagging")

    # PIC 처리된 raw_sentence에 AIC 적용
    # input : config.result_processed_path
    # output : config.result_output_path
    main_tagger_AIC = Tagger()
    main_tagger_AIC.evaluateAIC("result")
    main_tagger_AIC.main_taggingAIC(mode="result_tagging")

Exemple #2

0

Afficher le fichier

    def __init__(self):
        # Memuat data pre-trained POS-Tagger
        uni, bi, tri, word = self.load_obj("tagger")
        self.TAGGER1 = Tagger(uni, bi, tri, word)

        # Memuat data pre-trained POS-Tagger
        uni2, bi2, tri2, word2 = self.load_obj("tagger2")
        self.TAGGER2 = Tagger(uni2, bi2, tri2, word2)

        self.TAGGER3 = CRFTagger()
        self.TAGGER3.set_model_file(
            'dataset/all_indo_man_tag_corpus_model.crf.tagger')

        # Memuat data grammar chunker
        self.load_chunker()

Exemple #3

0

Afficher le fichier

Fichier : audio-cataloguer.py Projet : neoben/Tiny-Audio-Cataloguer

def main(argv):
    try:
        opts, args = getopt.getopt(argv, "hp:q", ["help"])
    except getopt.GetoptError:
        usage()
        sys.exit(2)
    path = ""
    quiet = False
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage()
            sys.exit(0)
        if opt == '-q':
            quiet = True
        elif opt == '-p':
            path = arg
        else:
            usage()
            sys.exit(0)
    if not path:
        usage()
        sys.exit(0)

    print("Starting tag scanning...")
    tagger = Tagger(path, quiet)
    tagger.scan_audio_files()
    print("...Done!")
    print

    print("Starting file cataloguing...")
    dic = tagger.get_scanned_dic()
    cataloguer = Cataloguer(path, dic, quiet)
    cataloguer.create_catalogue()
    print("...Done!")
    print

Exemple #4

0

Afficher le fichier

Fichier : huntag.py Projet : tomfurrier/hunlp-GATE

def mainTag(featureSet, options):
    transModel = None
    if not (options['printWeights'] or options['toCRFsuite']):
        print('loading transition model...', end='', file=sys.stderr, flush=True)
        transModel = TransModel.getModelFromFile(options['transModelFileName'])
        print('done', file=sys.stderr, flush=True)

    tagger = Tagger(featureSet, transModel, options)
    if 'inFeatFile' in options and options['inFeatFile']:
        # Tag a featurized file to to outputStream
        for sen, comment in tagger.tagFeatures(options['inFeatFile']):
            writeSentence(sen, options['outputStream'], comment)
    elif 'ioDirs' in options and options['ioDirs']:
        # Tag all files in a directory file to to fileName.tagged
        for sen, fileName in tagger.tagDir(options['ioDirs'][0]):
            writeSentence(sen, open(join(options['ioDirs'][1], '{0}.tagged'.format(fileName)), 'a', encoding='UTF-8'))
    elif 'toCRFsuite' in options and options['toCRFsuite']:
        # Make CRFsuite format to outputStream for tagging
        tagger.toCRFsuite(options['inputStream'], options['outputStream'])
    elif 'printWeights' in options and options['printWeights']:
        # Print MaxEnt weights to STDOUT
        tagger.printWeights(options['printWeights'], options['outputStream'])
    else:
        # Tag inputStream to outputStream
        for sen, comment in tagger.tagCorp(options['inputStream']):
            writeSentence(sen, options['outputStream'], comment)

Exemple #5

0

Afficher le fichier

Fichier : train.py Projet : sejin-P/nagisa

def _evaluation(hp, fn_model, data):
    tagger = Tagger(vocabs=hp['VOCAB'], params=fn_model, hp=hp['HYPERPARAMS'])

    def data_for_eval(words, postags):
        sent = []
        for w, p in zip(words, postags):
            p = w + "\t" + p
            if mecab_system_eval.PY_3 is True:
                w = w.encode("UTF-8")
                p = p.encode("UTF-8")
            sent.append([w, p])
        return sent

    sys_data = []
    ans_data = []
    indice = [i for i in range(len(data.ws_data))]
    for i in indice:
        words = data.words[i]
        pids = data.pos_data[i][1]
        postags = [tagger.id2pos[pid] for pid in pids]
        ans_data.append(data_for_eval(words, postags))

        output = tagger.tagging(''.join(words))
        sys_words = output.words
        sys_postags = output.postags
        sys_data.append(data_for_eval(sys_words, sys_postags))

    r = mecab_system_eval.mecab_eval(sys_data, ans_data)
    _, _, ws_f, _, _, pos_f = mecab_system_eval.calculate_fvalues(r)
    return ws_f, pos_f

Exemple #6

0

Afficher le fichier

def evaluation(hp, fn_model, data):
    tagger = Tagger(vocabs=hp['VOCAB'], params=fn_model, hp=hp['HYPERPARAMS'])

    gold = open(hp['TMP_GOLD'], 'w')
    pred = open(hp['TMP_PRED'], 'w')
    indice = [i for i in range(len(data.ws_data))]
    for i in indice:
        words = data.words[i]
        pids = data.pos_data[i][1]
        postags = [tagger.id2pos[pid] for pid in pids]

        for w, p in zip(words, postags):
            gold.write(w + '\t' + p + '\n')
        gold.write('EOS\n')

        output = tagger.tagging(''.join(words))
        sys_words = output.words
        sys_postags = output.postags

        for w, p in zip(sys_words, sys_postags):
            pred.write(w + '\t' + p + '\n')
        pred.write('EOS\n')

    ws_f, pos_f = mecab_eval(hp['TMP_PRED'], hp['TMP_GOLD'])
    return ws_f, pos_f

Exemple #7

0

Afficher le fichier

def diversity_sampling(feature_now, model_ver, budget):
    csvfile = 'records_us/'+model_ver+'.csv'
    model_selected = []

    for i in feature_now:
        with tf.name_scope(model_ver+'/feature_{0}'.format(i)):
            model = Tagger(model_file=model_ver+'/feature_{0}'.format(i),
                                n_input=FEATURE_SHAPE[i][0],n_steps=FEATURE_SHAPE[i][1],feature_number=i)
        model.train([],[],feature_number=i)
        model_selected.append(model)
        
    train_data, test_data = data_generation(model_selected, feature_now)
    train_x_all = train_data[0]
    train_y_all = train_data[1]
    test_x_all = test_data[0]
    test_y_all = test_data[1]
    episode = 1
    print(">>>>>> Playing game ..")
    while episode <= MAX_EPISODE:
        sample_N = min(budget*4,len(train_y_all))
        
        N = len(train_y_all)
        budget = min(budget,N)
        
        s = diversitySampling(train_x_all[:,:sample_N], pool = [], budget = budget)
        s.updateCplus()
        queried_indexs = s.newind
        for i in range(len(model_selected)):
            model_selected[i].train(np.array(train_x_all[i])[queried_indexs], np.array(train_y_all)[queried_indexs], feature_now[i])
        print(">>>>>> Terminate ...")
        write_csv(episode, csvfile, model_selected, train_x_all, test_x_all, train_y_all, test_y_all)
        episode = episode+1

Exemple #8

0

Afficher le fichier

Fichier : tagger_tests.py Projet : shubhampachori12110095/information-extraction-system

    def test_match_label_IOB_applied_correctly(self):
        tagger = Tagger()

        input = [[("Brunel", "", "", ""), ("University", "", "", ""),
                  ("test", "", "", ""), ("test", "", "", "")],
                 [("test", "", "", ""), ("test", "", "", ""),
                  ("Brunel", "", "", ""), ("University", "", "", "")],
                 [("test", "", "", ""), ("test", "", "", ""),
                  ("Brunel", "", "", ""), ("University", "", "", "")]]

        input_label = "Brunel University"
        input_match_tag = "match"
        output = tagger.match_label(input, input_label, input_match_tag)
        output = tagger.match_label(output, input_label, input_match_tag)
        output = tagger.match_label(output, input_label, input_match_tag)
        output = tagger.add_default_entity_tags(output)

        correct_iob = True
        for line in output:
            for token_idx, token in enumerate(line):
                if token[3].split("-", 1)[0] == "O":
                    next_token = "EOL" if len(line) == token_idx + 1 else line[
                        token_idx + 1][3].split("-", 1)[0]
                    if next_token == "I":
                        correct_iob = False
        self.assertEqual(correct_iob, True)

Exemple #9

0

Afficher le fichier

def PIC_train():
    # bootstrapping 에 사용될 unlabeled 학습데이터에 PIC 를 적용하는 과정
    # train_set : config.trainPIC_path
    # test_set : config.testPIC_path
    main_tagger_PIC = Tagger()
    main_tagger_PIC.main_trainPIC()
    main_tagger_PIC.evaluatePIC()

Exemple #10

0

Afficher le fichier

def main():
    parser = argparse.ArgumentParser(description='Part-of-Speech Tagging.')
    parser.add_argument(
        '--prefix',
        '-p',
        type=str,
        default='',
        help='specify prefix of files which will be used to store model')
    parser.add_argument('--times',
                        '-t',
                        type=int,
                        default=1,
                        help='specify iteration times')
    parser.add_argument(
        '--all',
        '-a',
        action='store_true',
        help='without this switch, model will be trained by random sampled data'
    )
    parser.add_argument('--file',
                        '-f',
                        type=str,
                        default='',
                        help='specify test data file')
    parser.add_argument('--save',
                        '-s',
                        action='store_true',
                        help='enable this to save model file')
    args = parser.parse_args()
    tagger = Tagger('data/wsj00-18.pos', args.times, not args.all, args.save,
                    args.prefix)
    test_data = Processor(args.file)
    tagger.benchmark(test_data)

Exemple #11

0

Afficher le fichier

def pos():
    post_data = request.json["text"]

    text_tagger = Tagger()
    response = text_tagger.run(post_data)

    return json.dumps(response)

Exemple #12

0

Afficher le fichier

Fichier : huntag.py Projet : zbxzc35/HunTag

def main_tag(featureSet, options):
    labelCounter, featCounter = BookKeeper(), BookKeeper()
    labelCounter.readFromFile('{0}.labelNumbers'.format(options.modelName))
    featCounter.readFromFile('{0}.featureNumbers'.format(options.modelName))
    optionsDict = vars(options)
    optionsDict['labelCounter'] = labelCounter
    optionsDict['featCounter'] = featCounter
    optionsDict['modelFile'] = '{0}.model'.format(options.modelName)
    tagger = Tagger(featureSet, optionsDict)
    if options.inFeatFile:
        tagger_func = lambda: tagger.tag_features(options.inFeatFile)
        writer_func = lambda s, c: writeSentence(s, comment=c)
    elif options.input_dir:
        assert isdir(options.input_dir), "--input-dir must be a directory"
        out_dir = "{}_out".format(options.input_dir)
        os.mkdir(out_dir)
        tagger_func = lambda: tagger.tag_dir(options.input_dir)
        writer_func = lambda s, c: writeSentence(
            s, out=open(join(out_dir, '{}.tagged'.format(c)), 'a'))
    else:
        tagger_func = lambda: tagger.tag_corp(sys.stdin)
        writer_func = lambda s, c: writeSentence(s, comment=c)

    for sen, other in tagger_func():
        writer_func(sen, other)

Exemple #13

0

Afficher le fichier

Fichier : wrapper.py Projet : qiuwei/Project_FOFE

 def learn(self, num_epochs, config_dict, seed):
     # config_dict contains a chosen value for each parameter
     model = Tagger(self.modelname, self.datafile, self.paramfile,
                    num_epochs, self.batchsize, **config_dict)
     # train
     metrics = model.train(num_epochs, seed, **config_dict)
     # metrics is dict = {epoch: (model, train_loss, dev_loss,test_loss, acc, f1_macro, f1_weighted)}
     return metrics

Exemple #14

0

Afficher le fichier

Fichier : main.py Projet : Gajenthran/NER-fr

def main(argv):
    if len(argv) < 3:
        usage(argv)

    dic = False
    freq = False
    own_tag = False
    if len(argv) >= 4:
        if argv[3] == "-d":
            dic = True

    if len(argv) >= 4:
        if argv[3] == "-f":
            freq = True

    if len(argv) >= 5:
        if argv[4] == "-f":
            freq = True

    if len(argv) >= 5:
        if argv[4] == "-f":
            freq = True

    if len(argv) >= 4:
        if argv[3] == "-t":
            own_tag = True

    if len(argv) >= 5:
        if argv[4] == "-t":
            own_tag = True

    if len(argv) >= 6:
        if argv[5] == "-t":
            own_tag = True

    ex = Util.read_file(argv[1])
    ex = Util.transform_text(ex)
    models = ["data/location.txt", "data/person.txt", "data/organisation.txt"]

    # Analyse lexicale
    lexer = Lexer(ex, own_tag)
    lexer.lex()

    # Analyse syntaxique
    parser = Parser(lexer.get_tokenized_text(), own_tag)
    parser.parse()

    # Analyse sémantique + reconnaissance des EN
    ner = NER(ex, parser.get_parsed_text())
    if dic: ner.gen_models(models)
    ner.apply()

    # Balisage du texte
    tagger = Tagger(ner.get_ner(), ex)
    if freq:
        tagger.freq_tag(argv[2])
    else:
        tagger.tag(argv[2])

Exemple #15

0

Afficher le fichier

Fichier : top_five.py Projet : Gkud/Projects

def test_run1(filename, test_product, no_of_clusters):
    conv = Converter()
    cl = Cluster()
    tg = Tagger()

    targetCategory = test_product['category']
    result = cl.test_run(filename, test_product, no_of_clusters)
    category = tg.readCategory(result)
    conv.run('r200.txt', result, category, targetCategory, 'newSum1.txt')

Exemple #16

0

Afficher le fichier

Fichier : gui.py Projet : IbrahimEzzatEisa/Personal-Assistant

def brain(command):
    response = ""
    command = command
    # from 0  =>> 15 is verb for search and find
    # from 16 =>> 21 is verb for open
    actions = [
        "search", "find", "view", "reach", "detect", "get", "catch", "explore",
        "achieve", "obtain", "pass", "check", "reveal", "expose", "observe",
        "show", "see", "listen", "hear", "open", "watch", "arise", "awaken",
        "call", "consciousness", "get up", "stir", "wake", "wake up"
    ]

    tokens = Tokenizer().tokenize(command)

    # call weather function if there is weather word and country or city name
    citiesORcountries = weatherFunction(command)
    if 'weather' in command.split() and citiesORcountries != []:
        return 'the weather in ' + citiesORcountries[0] + ' is ' + WeatherC(
        ).weatherForecast(citiesORcountries[0]) + ' today'

    action = None

    fileName = None
    # -----------------------------------<<Variable>>--------------------------------------------
    tagSentence = Tagger().tag(tokens)

    for counter in range(len(tagSentence)):
        # if tagSentence[counter][1] == 'VB' or tagSentence[counter][0] in self.actions:

        if tagSentence[counter][0] in actions:

            action = tagSentence[counter][0]

        elif tagSentence[counter][1] == 'NN':
            fileName = tagSentence[counter][0]

    normlizeAction = Normalizer().snowBallStemmer(action)

    if normlizeAction in actions:
        filePath = FileSearch().search(
            fileName)  # return list of file shared the same name

        if normlizeAction in actions[:15]:
            # for search about folder or file
            OpenMedia().openFile(filePath[0].split("//")[0])
            response = "i hope you're satisfied with our service"
            return response

        if normlizeAction in actions[15:21]:
            #if he
            if normlizeAction in [
                    'listen', 'hear', 'watch'
            ] and filePath[0].split('.')[1] != ['mp3', 'mp4', 'mkv']:

                pass
            OpenMedia().openFile(filePath[0])

Exemple #17

0

Afficher le fichier

Fichier : laucher.py Projet : dxcv/Individual-Project

def play_ner(feature_now, model_ver, poly, niter, logit, method):
    actions = 2
    global BUDGET
    
    tf.reset_default_graph()
    if AGENT == "LSTMQ":
        robot = RobotLSTMQ(actions, FEATURE, content = CONTENT, poly = poly, logit = logit, ntype = NTYPE, expnum = EXPNUM)
    else:
        print("** There is no robot.")
        raise SystemExit

    ############NEW###############################
    model_selected = []

    for i in feature_now:
        with tf.name_scope(model_ver+'/feature_{0}'.format(i)):
            model = Tagger(model_file=model_ver+'/feature_{0}'.format(i),
                                n_input=FEATURE_SHAPE[i][0],n_steps=FEATURE_SHAPE[i][1],feature_number=i, epochs=niter, expnum = EXPNUM)
        model.train([],[],feature_number = i)
        model_selected.append(model)


    game = initialise_game(model_selected,BUDGET,NITER,FEATURE, method)
    
    
 
    ###############################################
    
    # initialise a decision robot
    
    # play game
    episode = 1

    rAll = []
    while episode <= MAX_EPISODE:

        observation = game.get_frame(model_selected)
        action = robot.get_action(observation)

        reward, observation2, terminal = game.feedback(action, model_selected)
        game.rAll.append(reward)
        rAll.append(reward)

        robot.update(observation, action, reward, observation2, terminal)

        if terminal == True:
            print("> Episodes finished: ", float("%.3f" % (episode/MAX_EPISODE)), "> Reward: ", float("%.3f" % np.mean(rAll)))
            episode += 1
            rAll = []
            if episode == MAX_EPISODE:
                print('in')
                robot.save_Q_network(MODEL_VER)
                weights = find_weight.find_weight(model_selected, game.dev_x_all, game.dev_y_all)
                np.save(model_ver+'.npy', weights)
                print(weights)
    return robot

Exemple #18

0

Afficher le fichier

    def download(self):
        audio = pafy.new(self.url).getbestaudio()
        file = audio.download()

        self.newtitle = self.slugify(audio.title)
        self.__convertToMp3(file, audio.extension)
        tagger = Tagger(self.newtitle + '.mp3', self.title, self.artist,
                        self.genre, self.album)
        mp3 = tagger.editTags()
        return self.__renameFile(mp3)

Exemple #19

0

Afficher le fichier

Fichier : tagger_tests.py Projet : shubhampachori12110095/information-extraction-system

    def test_pos_tag_same_nr_tokens(self):
        tagger = Tagger()
        input, output = self.pos_tag_get_results(tagger)

        input_nr_tuples = [len(line) for line in input]
        input_nr_tuples = sum(input_nr_tuples)

        output_nr_tuples = [len(line) for line in output]
        output_nr_tuples = sum(output_nr_tuples)
        self.assertEqual(input_nr_tuples, output_nr_tuples)

Exemple #20

0

Afficher le fichier

def AIC_train():
    # bootstrapping에 사용되는 학습데이터는 "PIC_train"을 통해 PIC 처리를 사전에 진행해야 함.
    for epoch in range(1, config.boot_iter):
        print("iter : ", config.iter)
        main_tagger_AIC = Tagger()
        main_tagger_AIC.main_trainAIC()
        main_tagger_AIC.evaluateAIC("main_model")
        self_trainig = main_tagger_AIC.main_taggingAIC(
            mode="self_tagging")  # init self training data\

        # 배깅 모델 학습
        splited_labels, splited_features, splited_sentences = split_self_labeling(
            self_trainig[0], self_trainig[1], self_trainig[2])
        print("%s_iter(main) -> self_labled_s1 : %s" %
              (config.iter, len(self_trainig[0])))

        for model_idx in range(1, config.model_num + 1):
            print("model_idx : ", model_idx)
            bagging_taggerAIC = Tagger()
            bagging_taggerAIC.bagging_trainAIC("bagging_train", model_idx,
                                               splited_features[model_idx - 1],
                                               splited_labels[model_idx - 1])
            print("bagging model%s acc" % model_idx)
            bagging_taggerAIC.evaluateAIC("bagging_eval",
                                          model_idx)  # each bagging model
            bagging_bootstrap(model_idx, splited_sentences[model_idx - 1],
                              splited_labels[model_idx - 1])

            if model_idx == 1:
                score_i, raw_sentences, features = bagging_taggerAIC.bagging_taggingAIC(
                    "self_tagging", model_idx)
            else:
                new_socre_i = bagging_taggerAIC.bagging_taggingAIC(
                    "self_tagging", model_idx)
                for idx, _ in enumerate(new_socre_i):
                    score_i[idx] = np.asarray(score_i[idx]) + np.asarray(
                        new_socre_i[idx])

        predicts = bagging_taggerAIC.score2tag(score_i, raw_sentences,
                                               features)
        main_bootstrap(predicts)
        config.iter += 1

Exemple #21

0

Afficher le fichier

    def tag(self):
        if (not os.path.exists(self.lemma_file)) or 'tag' in self.args.no_cache:
            print('Tagging')
            self.lemmas = Tagger(self.args.obt_path, self.promises).tag()

            with open(self.lemma_file, 'w') as out:
                out.write(json.dumps(self.lemmas))
        else:
            print('Reading lemmas')
            with open(self.lemma_file, 'r') as file:
                self.lemmas = json.load(file)

Exemple #22

0

Afficher le fichier

Fichier : tagger_tests.py Projet : shubhampachori12110095/information-extraction-system

    def test_match_label_pos_labels_not_altered(self):
        tagger = Tagger()
        input, output = self.match_label_get_results(tagger)

        same_pos_labels_returned = True
        for line_idx, line in enumerate(output):
            for tuple_idx, tuple in enumerate(line):
                if tuple[1] != input[line_idx][tuple_idx][1]:
                    same_pos_labels_returned = False

        self.assertEqual(same_pos_labels_returned, True)

Exemple #23

0

Afficher le fichier

Fichier : backend.py Projet : CubicrootXYZ/Parlismonitoring

    def __init__(self, config, tagger=False, scraper=False, prepare_db=False):
        self.config = config
        if prepare_db and not self._prepare_database():
            return

        if scraper:
            sc = Scraper(self.config['parlis']['url'])
            sc.run()
        if tagger:
            ta = Tagger()
            ta.run()

Exemple #24

0

Afficher le fichier

Fichier : tagger_tests.py Projet : shubhampachori12110095/information-extraction-system

    def test_pos_tag_nonlocalner_labels_not_altered(self):
        tagger = Tagger()
        input, output = self.pos_tag_get_results(tagger)

        same_nlner_labels_returned = True
        for line_idx, line in enumerate(output):
            for tuple_idx, tuple in enumerate(line):
                if tuple[2] != input[line_idx][tuple_idx][2]:
                    same_nlner_labels_returned = False

        self.assertEqual(same_nlner_labels_returned, True)

Exemple #25

0

Afficher le fichier

Fichier : tagger_tests.py Projet : shubhampachori12110095/information-extraction-system

    def test_pos_tag_same_token_strs_returned(self):
        tagger = Tagger()
        input, output = self.pos_tag_get_results(tagger)

        same_tokens_returned = True
        for line_idx, line in enumerate(output):
            for tuple_idx, tuple in enumerate(line):
                if tuple[0] != input[line_idx][tuple_idx][0]:
                    same_tokens_returned = False

        self.assertEqual(same_tokens_returned, True)

Exemple #26

0

Afficher le fichier

Fichier : test_baselines.py Projet : dxcv/Individual-Project

def conservative_sampling(feature_now, model_ver, budget_test, cvit):
    test_child = []
    with open('test_child.txt') as f:
        for line in f.readlines():
            l = line.split()[0]
            test_child.append(l)

        #student is the individual test kid
    print(">>>>>> Playing game ..")
    model_selected = []
    for i in feature_now:
        with tf.name_scope(model_ver + '/feature_{0}'.format(i)):
            model = Tagger(model_file=model_ver + '/feature_{0}'.format(i),
                           n_input=FEATURE_SHAPE[i][0],
                           n_steps=FEATURE_SHAPE[i][1],
                           feature_number=i)
        model_selected.append(model)

    ID_student = 0
    # once ID_student > 14, break_loop
    while ID_student < len(test_child):
        train_x_all, train_y_all = test_data(model_selected, feature_now,
                                             test_child[ID_student])
        test_x_all, test_y_all = train_x_all, train_y_all

        sample_N = min(budget_test * 4, len(train_y_all))

        N = len(train_y_all)
        budget_test = min(budget_test, N)
        confidence = []
        conf_diff = np.zeros((sample_N, ))

        for i in range(len(model_selected)):
            confidence.append(model_selected[i].get_confidence(
                list(train_x_all[i][:sample_N])))
        # the max indecies
        ind_max = np.argmax(confidence, axis=0)
        # the min indecies
        ind_min = np.argmin(confidence, axis=0)
        for i in range(sample_N):
            conf_diff[i] = confidence[ind_max[i]][i] - confidence[
                ind_min[i]][i]
        queried_indexs = sorted(range(len(conf_diff)),
                                key=lambda i: conf_diff[i])[:budget_test]

        for i in range(len(model_selected)):
            model_selected[i].train_mode_B(
                np.array(train_x_all[i])[queried_indexs],
                np.array(train_y_all)[queried_indexs], feature_now[i])
        print("training of mode B finished")
        write_test_csv(model_selected, ID_student, model_ver, cvit, test_x_all,
                       test_y_all)
        ID_student = ID_student + 1

Exemple #27

0

Afficher le fichier

Fichier : Summarizer_R.py Projet : QaisarRajput/TEXTA

def Get_TweetTags(data,no_tags,multi_tag_len, dict_path=None):
    
    if dict_path is not None:
        weights = pickle.load(open(dict_path, 'rb')) # or your own dictionary
    else:
        weights = pickle.load(open(BASE_DIR+'/Summarizer_Tagger/data/dict.pkl', 'rb')) # default dictionary

    myreader = tagger.Reader() # or your own reader class
    mystemmer = tagger.Stemmer() # or your own stemmer class
    myrater = tagger.Rater(weights,multi_tag_len) # or your own... (you got the idea)
    mytagger = Tagger(myreader, mystemmer, myrater)
    best_tags = mytagger(data, no_tags)
    
    return best_tags

Exemple #28

0

Afficher le fichier

Fichier : tagger_tests.py Projet : shubhampachori12110095/information-extraction-system

    def test_pos_tag_4_slots_each_tuple(self):
        tagger = Tagger()
        input, output = self.pos_tag_get_results(tagger)

        input_nr_tuple_slots = [
            1 for line in input for tuple in line if len(tuple) == 4
        ]
        input_nr_tuple_slots = sum(input_nr_tuple_slots)

        output_nr_tuple_slots = [
            1 for line in output for tuple in line if len(tuple) == 4
        ]
        output_nr_tuple_slots = sum(output_nr_tuple_slots)
        self.assertEqual(input_nr_tuple_slots, output_nr_tuple_slots)

Exemple #29

0

Afficher le fichier

    def analyze(self, text, tokenizer=str.split):
        """Analyze text and return pretty format.

        Args:
            text: string, the input text.
            tokenizer: Tokenize input sentence. Default tokenizer is `str.split`.

        Returns:
            res: dict.
        """
        if not self.tagger:
            self.tagger = Tagger(self.model,
                                 preprocessor=self.p,
                                 tokenizer=tokenizer)

        return self.tagger.analyze(text)

Exemple #30

0

Afficher le fichier

Fichier : runner.py Projet : Yunif3/cell_viz

def run():
    print("start")
    listener = Recog()
    tagger = Tagger()
    print("done setting up")
    while True:
        try:
            sentence = listener.listen()
            # sentence = "make line graph using range from A1 to E4"
            tags = tagger.match_rules(sentence)
            print(tags)
            process(tags)
        except KeyboardInterrupt:
            break
        except Exception as e:
            continue