Example #1
0
    data = loader.get_data(config['Dataloader']['max_txt_length'],
                           config['Dataloader']['samples'])

    emb = Embedder(config['Embedder'])

    embeddings = emb.get_embeddings(data['title'])

    clustering = Clustering(data, config['Clustering']['directory'],
                            config['Clustering']['cluster_picture_name'],
                            config['Clustering']['result_data_file_name'],
                            config['Clustering']['center_replics_file_name'],
                            config['Clustering']['part_to_plot'],
                            config['Clustering']['bgm_config'])

    df = clustering.get_clusters_and_final_data(embeddings)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    splitter = Splitter(df, config['Splitter']['path_to_save_data'],
                        config['Splitter']['min_freq'],
                        config['Splitter']['test_size'],
                        config['Splitter']['batch_size'], device)

    train_iterator, test_iterator, train_data, test_data, SRC, TRG = splitter.get_iterators_and_fields(
    )

    input_dim = len(SRC.vocab)
    output_dim = len(TRG.vocab)
    trg_pad_idx = TRG.vocab.stoi[TRG.pad_token]