packagesPath = "/content/drive/My Drive/Colab Notebooks/packages/TextMining" sys.path.append(packagesPath) import lc as LC import os, logging from params.core import Core as Params from dataset.core import Core as Dataset from corpus.meta import Meta logging.basicConfig(level=logging.INFO) logging.info("# This script generates meta information about the corpus") logging.info("# 1. Loading script params ") logging.info("# ================================") scriptParams = Params() params = scriptParams.get() scriptParams.save(params.data_directory) logging.info("# 2. Preprocessing data") logging.info("# ================================") dataset = Dataset(params.dataset_name, params.data_directory) datasetToProcess = dataset.get(float(params.dataset_percentage), int(params.total_items)) if not datasetToProcess: logging.error('No dataset found') sys.exit() data = datasetToProcess.getTrainingSet()
from __future__ import absolute_import, division, print_function, unicode_literals import sys packagesPath = "/content/drive/My Drive/Colab Notebooks/packages/TextMining" sys.path.append(packagesPath) import tensorflow_datasets as tfds import tensorflow as tf import os, logging, sys import time import numpy as np import matplotlib.pyplot as plt from params.core import Core as Params from layers.positionalEncoding import PositionalEncoding logging.basicConfig(level=logging.INFO) scriptParams = Params() params = scriptParams.get() positionalEncoding = PositionalEncoding(params.source_max_sequence_length, params.dimensions) positionalEmbedding = positionalEncoding.getEmbedding() print(positionalEmbedding.shape) plt.pcolormesh(positionalEmbedding[0], cmap='RdBu') plt.xlabel('Depth') plt.xlim((0, 512)) plt.ylabel('Position') plt.colorbar() plt.show()