Python PerplexityScoreConfigの例

プログラミング言語: Python

名前空間/パッケージ名: artm.wrapper.messages_pb2

メソッド/関数: PerplexityScoreConfig

hotexamples.comのコード掲載数: 6

Python PerplexityScoreConfig - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのartm.wrapper.messages_pb2.PerplexityScoreConfigの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def test_func():
    # Set some constants
    data_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
    dictionary_name = 'dictionary'
    pwt = 'pwt'
    nwt = 'nwt'
    rwt = 'rwt'
    docword = 'docword.kos.txt'
    vocab = 'vocab.kos.txt'

    smsp_phi_tau = -0.2
    smsp_theta_tau = -0.1
    decor_phi_tau = 1000000

    num_topics = 10
    num_inner_iterations = 10
    num_outer_iterations = 8

    perplexity_tol = 0.001
    expected_perplexity_value_on_iteration = {
        0: 6703.161,
        1: 2426.277,
        2: 2276.476,
        3: 1814.072,
        4: 1742.911,
        5: 1637.142,
        6: 1612.946,
        7: 1581.725
    }
    sparsity_tol = 0.001
    expected_phi_sparsity_value_on_iteration = {
        0: 0.059,
        1: 0.120,
        2: 0.212,
        3: 0.306,
        4: 0.380,
        5: 0.438,
        6: 0.483,
        7: 0.516
    }
    expected_theta_sparsity_value_on_iteration = {
        0: 0.009,
        1: 0.036,
        2: 0.146,
        3: 0.239,
        4: 0.278,
        5: 0.301,
        6: 0.315,
        7: 0.319
    }

    batches_folder = tempfile.mkdtemp()
    try:
        # Create the instance of low-level API and master object
        lib = artm.wrapper.LibArtm()

        # Parse collection from disk
        lib.ArtmParseCollection({
            'format':
            constants.CollectionParserConfig_Format_BagOfWordsUci,
            'docword_file_path':
            os.path.join(data_path, docword),
            'vocab_file_path':
            os.path.join(data_path, vocab),
            'target_folder':
            batches_folder
        })

        # Create master component and scores
        scores = {
            'Perplexity': messages.PerplexityScoreConfig(),
            'SparsityPhi': messages.SparsityPhiScoreConfig()
        }
        master = mc.MasterComponent(lib, scores=scores)

        master.create_score('SparsityTheta',
                            messages.SparsityThetaScoreConfig())
        master.create_score('TopTokens', messages.TopTokensScoreConfig())

        # Create collection dictionary and import it
        master.gather_dictionary(dictionary_target_name=dictionary_name,
                                 data_path=batches_folder,
                                 vocab_file_path=os.path.join(
                                     data_path, vocab))

        # Configure basic regularizers
        master.create_regularizer(name='SmoothSparsePhi',
                                  config=messages.SmoothSparsePhiConfig(),
                                  tau=0.0)
        master.create_regularizer(name='SmoothSparseTheta',
                                  config=messages.SmoothSparseThetaConfig(),
                                  tau=0.0)
        master.create_regularizer(name='DecorrelatorPhi',
                                  config=messages.DecorrelatorPhiConfig(),
                                  tau=0.0)

        # Initialize model
        master.initialize_model(
            model_name=pwt,
            topic_names=['topic_{}'.format(i) for i in xrange(num_topics)],
            dictionary_name=dictionary_name)

        for iter in xrange(num_outer_iterations):
            # Invoke one scan of the collection, regularize and normalize Phi
            master.clear_score_cache()
            master.process_batches(pwt=pwt,
                                   nwt=nwt,
                                   num_inner_iterations=num_inner_iterations,
                                   batches_folder=batches_folder,
                                   regularizer_name=['SmoothSparseTheta'],
                                   regularizer_tau=[smsp_theta_tau])
            master.regularize_model(pwt, nwt, rwt,
                                    ['SmoothSparsePhi', 'DecorrelatorPhi'],
                                    [smsp_phi_tau, decor_phi_tau])
            master.normalize_model(pwt, nwt, rwt)

            # Retrieve scores
            perplexity_score = master.get_score('Perplexity')
            sparsity_phi_score = master.get_score('SparsityPhi')
            sparsity_theta_score = master.get_score('SparsityTheta')

            # Assert and print scores
            print_string = 'Iter#{0}'.format(iter)
            print_string += ': Perplexity = {0:.3f}'.format(
                perplexity_score.value)
            print_string += ', Phi sparsity = {0:.3f}'.format(
                sparsity_phi_score.value)
            print_string += ', Theta sparsity = {0:.3f}'.format(
                sparsity_theta_score.value)
            print print_string

            assert abs(
                perplexity_score.value -
                expected_perplexity_value_on_iteration[iter]) < perplexity_tol
            assert abs(
                sparsity_phi_score.value -
                expected_phi_sparsity_value_on_iteration[iter]) < sparsity_tol
            assert abs(sparsity_theta_score.value -
                       expected_theta_sparsity_value_on_iteration[iter]
                       ) < sparsity_tol

        # Retrieve and print top tokens score
        top_tokens_score = master.get_score('TopTokens')

        print 'Top tokens per topic:'
        top_tokens_triplets = zip(
            top_tokens_score.topic_index,
            zip(top_tokens_score.token, top_tokens_score.weight))
        for topic_index, group in itertools.groupby(
                top_tokens_triplets, key=lambda (topic_index, _): topic_index):
            print_string = 'Topic#{0} : '.format(topic_index)
            for _, (token, weight) in group:
                print_string += ' {0}({1:.3f})'.format(token, weight)
            print print_string
    finally:
        shutil.rmtree(batches_folder)

コード例 #2

ファイルを表示

ファイル: test_04_dictionary.py プロジェクト: itisgrisha/tgnews

def test_func():
    # Set some constants
    data_path = os.environ.get('BIGARTM_UNITTEST_DATA')
    dictionary_name = 'dictionary'
    pwt = 'pwt'
    nwt = 'nwt'
    rwt = 'rwt'
    docword = 'docword.kos.txt'
    vocab = 'vocab.kos.txt'

    num_topics = 10
    num_document_passes = 10
    num_outer_iterations = 8

    smsp_phi_tau = -20.0
    smsp_theta_tau = -3.0

    perplexity_tol = 1.0
    expected_perp_col_value_on_iteration = {
        0: 6650.1,
        1: 2300.2,
        2: 1996.8,
        3: 1786.1,
        4: 1692.7,
        5: 1644.4,
        6: 1612.3,
        7: 1589.5
    }
    expected_perp_doc_value_on_iteration = {
        0: 6614.6,
        1: 2295.0,
        2: 1996.4,
        3: 1786.1,
        4: 1692.7,
        5: 1644.2,
        6: 1611.7,
        7: 1588.6
    }
    expected_perp_zero_words_on_iteration = {
        0: 494,
        1: 210,
        2: 24,
        3: 0,
        4: 2,
        5: 10,
        6: 28,
        7: 47
    }

    batches_folder = tempfile.mkdtemp()
    try:
        # Create the instance of low-level API and master object
        lib = artm.wrapper.LibArtm()

        # Parse collection from disk
        lib.ArtmParseCollection({
            'format':
            constants.CollectionParserConfig_CollectionFormat_BagOfWordsUci,
            'docword_file_path':
            os.path.join(data_path, docword),
            'vocab_file_path':
            os.path.join(data_path, vocab),
            'target_folder':
            batches_folder
        })

        # Create master component and scores
        perplexity_config = messages.PerplexityScoreConfig()
        perplexity_config.model_type = constants.PerplexityScoreConfig_Type_UnigramCollectionModel
        perplexity_config.dictionary_name = dictionary_name

        scores = {
            'PerplexityDoc': messages.PerplexityScoreConfig(),
            'PerplexityCol': perplexity_config
        }
        master = mc.MasterComponent(lib, scores=scores)

        # Create collection dictionary and import it
        master.gather_dictionary(dictionary_target_name=dictionary_name,
                                 data_path=batches_folder,
                                 vocab_file_path=os.path.join(
                                     data_path, vocab))

        # Configure basic regularizers
        master.create_regularizer(name='SmoothSparsePhi',
                                  config=messages.SmoothSparsePhiConfig(
                                      dictionary_name=dictionary_name),
                                  tau=0.0)
        master.create_regularizer(name='SmoothSparseTheta',
                                  config=messages.SmoothSparseThetaConfig(),
                                  tau=0.0)

        # Initialize model
        master.initialize_model(
            model_name=pwt,
            topic_names=['topic_{}'.format(i) for i in range(num_topics)],
            dictionary_name=dictionary_name)

        for iter in range(num_outer_iterations):
            # Invoke one scan of the collection, regularize and normalize Phi
            master.clear_score_cache()
            master.process_batches(pwt=pwt,
                                   nwt=nwt,
                                   num_document_passes=num_document_passes,
                                   batches_folder=batches_folder,
                                   regularizer_name=['SmoothSparseTheta'],
                                   regularizer_tau=[smsp_theta_tau])
            master.regularize_model(pwt, nwt, rwt, ['SmoothSparsePhi'],
                                    [smsp_phi_tau])
            master.normalize_model(pwt, nwt, rwt)

            # Retrieve perplexity score
            perplexity_doc_score = master.get_score('PerplexityDoc')
            perplexity_col_score = master.get_score('PerplexityCol')

            # Assert and print scores
            string = 'Iter#{0}'.format(iter)
            string += ': Collection perp. = {0:.1f}'.format(
                perplexity_col_score.value)
            string += ', Document perp. = {0:.1f}'.format(
                perplexity_doc_score.value)
            string += ', Zero words = {0}'.format(
                perplexity_doc_score.zero_words)
            print(string)

            print(perplexity_col_score.value,
                  expected_perp_col_value_on_iteration[iter])
            assert abs(
                perplexity_col_score.value -
                expected_perp_col_value_on_iteration[iter]) < perplexity_tol
            assert abs(
                perplexity_doc_score.value -
                expected_perp_doc_value_on_iteration[iter]) < perplexity_tol
            assert perplexity_doc_score.zero_words - expected_perp_zero_words_on_iteration[
                iter] == 0
    finally:
        shutil.rmtree(batches_folder)

コード例 #3

ファイルを表示

def test_func():
    # Set some constants
    data_path = os.environ.get('BIGARTM_UNITTEST_DATA')
    dictionary_name = 'dictionary'
    pwt = 'pwt'
    nwt = 'nwt'
    nwt_hat = 'nwt_hat'
    docword = 'docword.kos.txt'
    vocab = 'vocab.kos.txt'

    num_topics = 10
    num_document_passes = 10
    num_outer_iterations = 8
    num_processors = 2
    
    decay_weight = 0.7
    apply_weight = 0.3
    
    num_batches = 2
    top_tokens_value = 0.5
    top_tokens_tol = 0.5
    perplexity_first_value = set([6714.673, 6710.324, 6706.906, 6710.120, 6710.327, 6717.755,
                                  6717.757, 6698.847, 6710.120, 6714.667, 6698.852, 6706.903])

    batches_folder = tempfile.mkdtemp()
    try:
        # Create the instance of low-level API and master object
        lib = artm.wrapper.LibArtm()
        
        # Parse collection from disk
        lib.ArtmParseCollection({'format': constants.CollectionParserConfig_CollectionFormat_BagOfWordsUci,
                                 'docword_file_path': os.path.join(data_path, docword),
                                 'vocab_file_path': os.path.join(data_path, vocab),
                                 'target_folder': batches_folder})

        # Create master component and scores
        scores = {'Perplexity': messages.PerplexityScoreConfig(),
                  'TopTokens': messages.TopTokensScoreConfig()}
        master = mc.MasterComponent(lib, num_processors=num_processors, scores=scores)

        # Create collection dictionary and import it
        master.gather_dictionary(dictionary_target_name=dictionary_name,
                                 data_path=batches_folder,
                                 vocab_file_path=os.path.join(data_path, vocab))

        # Initialize model
        master.initialize_model(model_name=pwt,
                                topic_names=['topic_{}'.format(i) for i in range(num_topics)],
                                dictionary_name=dictionary_name)

        # Get file names of batches to process
        batches = []
        for name in os.listdir(batches_folder):
            _, extension = os.path.splitext(name)
            if extension == '.batch':
                batches.append(os.path.join(batches_folder, name))

        # Perform iterations
        update_every = num_processors
        batches_to_process = []
        for iter in range(num_outer_iterations):
            for batch_index, batch_filename in enumerate(batches):
                batches_to_process.append(batch_filename)
                if ((batch_index + 1) % update_every == 0) or ((batch_index + 1) == len(batches)):
                    master.clear_score_cache()
                    master.process_batches(pwt, nwt_hat, num_document_passes, batches=batches_to_process)
                    master.merge_model({nwt: decay_weight, nwt_hat: apply_weight}, nwt=nwt)
                    master.normalize_model(pwt, nwt)

                    # Retrieve and print perplexity score
                    perplexity_score = master.get_score('Perplexity')
                    if iter == 0 and batch_index == 0:
                        assert(perplexity_score.value in perplexity_first_value)
                    assert len(batches_to_process) == num_batches
                    print_string = 'Iteration = {0},'.format(iter)
                    print_string += 'Perplexity = {0:.3f}'.format(perplexity_score.value)
                    print_string += ', num batches = {0}'.format(len(batches_to_process))
                    print(print_string)
                    batches_to_process = []

        # Retrieve and print top tokens score
        top_tokens_score = master.get_score('TopTokens')

        print('Top tokens per topic:')
        top_tokens_triplets = zip(top_tokens_score.topic_index, zip(top_tokens_score.token, top_tokens_score.weight))
        for topic_index, group in itertools.groupby(top_tokens_triplets, key=lambda triplet: triplet[0]):
            print_string = 'Topic#{0} : '.format(topic_index)
            for _, (token, weight) in group:
                print_string += ' {0}({1:.3f})'.format(token, weight)
                assert abs(weight - top_tokens_value) < top_tokens_tol
            print(print_string)
    finally:
        shutil.rmtree(batches_folder)

コード例 #4

ファイルを表示

ファイル: test_03_concurrency.py プロジェクト: smirnovevgeny/bigartm

def test_func():
    # Set some constants
    data_path = os.environ.get('BIGARTM_UNITTEST_DATA')
    dictionary_name = 'dictionary'
    pwt = 'pwt'
    nwt = 'nwt'
    docword = 'docword.kos.txt'
    vocab = 'vocab.kos.txt'
    num_processors_list = [4, 2, 1]

    num_topics = 10
    num_document_passes = 10
    num_outer_iterations = 5

    perplexity_tol = 0.001
    expected_perplexity_value_on_iteration = {
        0: 6710.208,
        1: 2434.135,
        2: 2202.418,
        3: 1936.493,
        4: 1774.600
    }

    batches_folder = tempfile.mkdtemp()
    try:
        # Create the instance of low-level API and master object
        lib = artm.wrapper.LibArtm()

        # Parse collection from disk
        lib.ArtmParseCollection({
            'format':
            constants.CollectionParserConfig_CollectionFormat_BagOfWordsUci,
            'docword_file_path':
            os.path.join(data_path, docword),
            'vocab_file_path':
            os.path.join(data_path, vocab),
            'target_folder':
            batches_folder
        })

        for num_processors in num_processors_list:
            # Create master component and scores
            scores = {'PerplexityScore': messages.PerplexityScoreConfig()}
            master = mc.MasterComponent(lib, scores=scores)

            # Create collection dictionary and import it
            master.gather_dictionary(dictionary_target_name=dictionary_name,
                                     data_path=batches_folder,
                                     vocab_file_path=os.path.join(
                                         data_path, vocab))

            # Initialize model
            master.initialize_model(
                model_name=pwt,
                topic_names=['topic_{}'.format(i) for i in range(num_topics)],
                dictionary_name=dictionary_name)

            times = []
            for iter in range(num_outer_iterations):
                start = time.time()

                # Invoke one scan of the collection and normalize Phi
                master.clear_score_cache()
                master.process_batches(pwt, nwt, num_document_passes,
                                       batches_folder)
                master.normalize_model(pwt, nwt)

                # Retrieve and print perplexity score
                perplexity_score = master.get_score('PerplexityScore')

                end = time.time()
                assert abs(expected_perplexity_value_on_iteration[iter] -
                           perplexity_score.value) < perplexity_tol
                times.append(end - start)
                string = 'Iter#{0}'.format(iter)
                string += ': Perplexity = {0:.3f}, Time = {1:.3f}'.format(
                    perplexity_score.value, end - start)
                print(string)

            print('Average time per iteration = {0:.3f}'.format(
                float(sum(times)) / len(times)))
    finally:
        shutil.rmtree(batches_folder)

コード例 #5

ファイルを表示

ファイル: test_11_master_model_api.py プロジェクト: qbold/bigartm

def test_func():
    # Set some constants
    data_path = os.environ.get('BIGARTM_UNITTEST_DATA')
    dictionary_name = 'dictionary'
    pwt = 'pwt'
    nwt = 'nwt'
    docword = 'docword.kos.txt'
    vocab = 'vocab.kos.txt'

    smsp_phi_tau = -0.2
    smsp_theta_tau = -0.1
    decor_phi_tau = 1000000

    num_topics = 10
    num_document_passes = 10
    num_outer_iterations = 8

    perplexity_tol = 0.001
    expected_perplexity_value_on_iteration = {
        0: 6703.161,
        1: 2426.277,
        2: 2276.476,
        3: 1814.072,
        4: 1742.911,
        5: 1637.142,
        6: 1612.946,
        7: 1581.725
    }
    sparsity_tol = 0.001
    expected_phi_sparsity_value_on_iteration = {
        0: 0.059,
        1: 0.120,
        2: 0.212,
        3: 0.306,
        4: 0.380,
        5: 0.438,
        6: 0.483,
        7: 0.516
    }
    expected_theta_sparsity_value_on_iteration = {
        0: 0.009,
        1: 0.036,
        2: 0.146,
        3: 0.239,
        4: 0.278,
        5: 0.301,
        6: 0.315,
        7: 0.319
    }

    expected_perplexity_value_online = 1572.268
    expected_phi_sparsity_value_online = 0.528
    expected_theta_sparsity_value_online = 0.320

    batches_folder = tempfile.mkdtemp()
    try:
        # Create the instance of low-level API and master object
        lib = artm.wrapper.LibArtm()

        # Parse collection from disk
        lib.ArtmParseCollection({
            'format':
            constants.CollectionParserConfig_CollectionFormat_BagOfWordsUci,
            'docword_file_path':
            os.path.join(data_path, docword),
            'vocab_file_path':
            os.path.join(data_path, vocab),
            'target_folder':
            batches_folder
        })

        # Create master component and scores
        scores = {
            'Perplexity': messages.PerplexityScoreConfig(),
            'SparsityPhi': messages.SparsityPhiScoreConfig()
        }
        master = mc.MasterComponent(lib,
                                    scores=scores,
                                    num_document_passes=num_document_passes)

        master.create_score('SparsityTheta',
                            messages.SparsityThetaScoreConfig())
        master.create_score('TopTokens', messages.TopTokensScoreConfig())

        # Create collection dictionary and import it
        master.gather_dictionary(dictionary_target_name=dictionary_name,
                                 data_path=batches_folder,
                                 vocab_file_path=os.path.join(
                                     data_path, vocab))

        # Configure basic regularizers
        master.create_regularizer(name='SmoothSparsePhi',
                                  config=messages.SmoothSparsePhiConfig(),
                                  tau=0.0)
        master.create_regularizer(name='SmoothSparseTheta',
                                  config=messages.SmoothSparseThetaConfig(),
                                  tau=0.0)
        master.create_regularizer(name='DecorrelatorPhi',
                                  config=messages.DecorrelatorPhiConfig(),
                                  tau=decor_phi_tau)

        master.reconfigure_regularizer(name='SmoothSparsePhi',
                                       tau=smsp_phi_tau)
        master.reconfigure_regularizer(name='SmoothSparseTheta',
                                       tau=smsp_theta_tau)

        # Initialize model
        master.initialize_model(
            model_name=pwt,
            topic_names=['topic_{}'.format(i) for i in range(num_topics)],
            dictionary_name=dictionary_name)

        for iter in range(num_outer_iterations):
            master.fit_offline(batches_folder=batches_folder,
                               num_collection_passes=1)

            # Retrieve scores
            perplexity_score = master.get_score('Perplexity')
            sparsity_phi_score = master.get_score('SparsityPhi')
            sparsity_theta_score = master.get_score('SparsityTheta')

            # Assert and print scores
            print_string = 'Iter#{0}'.format(iter)
            print_string += ': Perplexity = {0:.3f}'.format(
                perplexity_score.value)
            print_string += ', Phi sparsity = {0:.3f}'.format(
                sparsity_phi_score.value)
            print_string += ', Theta sparsity = {0:.3f}'.format(
                sparsity_theta_score.value)
            print(print_string)

            assert abs(
                perplexity_score.value -
                expected_perplexity_value_on_iteration[iter]) < perplexity_tol
            assert abs(
                sparsity_phi_score.value -
                expected_phi_sparsity_value_on_iteration[iter]) < sparsity_tol
            assert abs(sparsity_theta_score.value -
                       expected_theta_sparsity_value_on_iteration[iter]
                       ) < sparsity_tol

            perplexity_scores = master.get_score_array('Perplexity')
            assert len(perplexity_scores) == (iter + 1)

        # proceed one online iteration
        batch_filenames = glob.glob(os.path.join(batches_folder, '*.batch'))
        master.fit_online(batch_filenames=batch_filenames,
                          update_after=[4],
                          apply_weight=[0.5],
                          decay_weight=[0.5])

        # Retrieve scores
        perplexity_score = master.get_score('Perplexity')
        sparsity_phi_score = master.get_score('SparsityPhi')
        sparsity_theta_score = master.get_score('SparsityTheta')

        # Assert and print scores
        print_string = 'Iter Online'
        print_string += ': Perplexity = {0:.3f}'.format(perplexity_score.value)
        print_string += ', Phi sparsity = {0:.3f}'.format(
            sparsity_phi_score.value)
        print_string += ', Theta sparsity = {0:.3f}'.format(
            sparsity_theta_score.value)
        print(print_string)

        assert abs(perplexity_score.value -
                   expected_perplexity_value_online) < perplexity_tol
        assert abs(sparsity_phi_score.value -
                   expected_phi_sparsity_value_online) < sparsity_tol
        assert abs(sparsity_theta_score.value -
                   expected_theta_sparsity_value_online) < sparsity_tol

        # Retrieve and print top tokens score
        top_tokens_score = master.get_score('TopTokens')

        print('Top tokens per topic:')
        top_tokens_triplets = zip(
            top_tokens_score.topic_index,
            zip(top_tokens_score.token, top_tokens_score.weight))
        for topic_index, group in itertools.groupby(
                top_tokens_triplets, key=lambda triplet: triplet[0]):
            print_string = 'Topic#{0} : '.format(topic_index)
            for _, (token, weight) in group:
                print_string += ' {0}({1:.3f})'.format(token, weight)
            print(print_string)

        master.clear_score_array_cache()
        master.fit_online(batch_filenames=batch_filenames,
                          update_after=[1, 2, 3, 4],
                          apply_weight=[0.5, 0.5, 0.5, 0.5],
                          decay_weight=[0.5, 0.5, 0.5, 0.5])
        perplexity_scores = master.get_score_array('Perplexity')
        assert len(perplexity_scores) == 4

    finally:
        shutil.rmtree(batches_folder)

コード例 #6

ファイルを表示

ファイル: test_01_synthetic_collection.py プロジェクト: smirnovevgeny/bigartm

def test_func():
    # Set some constants
    num_tokens = 60
    num_items = 100
    pwt = 'pwt'
    nwt = 'nwt'

    num_topics = 10
    num_document_passes = 10
    num_outer_iterations = 10
    num_top_tokens = 4

    perplexity_tol = 0.001
    expected_perplexity_value_on_iteration = {
        0: 54.616,
        1: 38.472,
        2: 28.655,
        3: 24.362,
        4: 22.355,
        5: 21.137,
        6: 20.808,
        7: 20.791,
        8: 20.746,
        9: 20.581
    }

    top_tokens_tol = 0.05
    expected_top_tokens_weight = 0.1

    dictionary_name = 'dictionary'
    batches_folder = tempfile.mkdtemp()
    try:
        # Generate small collection
        batch = messages.Batch()
        batch.id = str(uuid.uuid4())
        for token_id in range(num_tokens):
            batch.token.append('token_{0}'.format(token_id))

        for item_id in range(num_items):
            item = batch.item.add()
            item.id = item_id
            for token_id in range(num_tokens):
                item.token_id.append(token_id)
                background_count = ((item_id + token_id) % 5 +
                                    1) if (token_id >= 40) else 0
                target_topics = num_topics if (token_id < 40) and (
                    (token_id % 10) == (item_id % 10)) else 0
                item.token_weight.append(background_count + target_topics)

        # Create the instance of low-level API
        lib = artm.wrapper.LibArtm()

        # Save batch on the disk
        lib.ArtmSaveBatch(batches_folder, batch)

        # Create master component and scores
        scores = {
            'PerplexityScore':
            messages.PerplexityScoreConfig(),
            'TopTokensScore':
            messages.TopTokensScoreConfig(num_tokens=num_top_tokens)
        }
        master = mc.MasterComponent(lib, scores=scores)

        # Create collection dictionary and import it
        master.gather_dictionary(dictionary_target_name=dictionary_name,
                                 data_path=batches_folder)

        # Initialize model
        master.initialize_model(
            model_name=pwt,
            topic_names=['topic_{}'.format(i) for i in range(num_topics)],
            dictionary_name=dictionary_name)

        for iter in range(num_outer_iterations):
            # Invoke one scan of the collection and normalize Phi
            master.clear_score_cache()
            master.process_batches(pwt, nwt, num_document_passes,
                                   batches_folder)
            master.normalize_model(pwt, nwt)

            # Retrieve and print perplexity score
            perplexity_score = master.get_score('PerplexityScore')
            assert abs(
                perplexity_score.value -
                expected_perplexity_value_on_iteration[iter]) < perplexity_tol
            print('Iteration#{0} : Perplexity = {1:.3f}'.format(
                iter, perplexity_score.value))

        # Retrieve and print top tokens score
        top_tokens_score = master.get_score('TopTokensScore')

        print('Top tokens per topic:')
        top_tokens_triplets = zip(
            top_tokens_score.topic_index,
            zip(top_tokens_score.token, top_tokens_score.weight))
        for topic_index, group in itertools.groupby(
                top_tokens_triplets, key=lambda triplet: triplet[0]):
            string = 'Topic#{0} : '.format(topic_index)
            for _, (token, weight) in group:
                assert abs(weight -
                           expected_top_tokens_weight) < top_tokens_tol
                string += ' {0}({1:.3f})'.format(token, weight)
            print(string)
    finally:
        shutil.rmtree(batches_folder)