Beispiel #1
0
packagesPath = "/content/drive/My Drive/Colab Notebooks/packages/TextMining"
sys.path.append(packagesPath)

import lc as LC
import os, logging
from params.core import Core as Params
from dataset.core import Core as Dataset
from corpus.meta import Meta

logging.basicConfig(level=logging.INFO)
logging.info("# This script generates meta information about the corpus")

logging.info("# 1. Loading script params ")
logging.info("# ================================")
scriptParams = Params()
params = scriptParams.get()
scriptParams.save(params.data_directory)

logging.info("# 2. Preprocessing data")
logging.info("# ================================")

dataset = Dataset(params.dataset_name, params.data_directory)
datasetToProcess = dataset.get(float(params.dataset_percentage),
                               int(params.total_items))

if not datasetToProcess:
    logging.error('No dataset found')
    sys.exit()

data = datasetToProcess.getTrainingSet()
Beispiel #2
0
from __future__ import absolute_import, division, print_function, unicode_literals

import sys
packagesPath = "/content/drive/My Drive/Colab Notebooks/packages/TextMining"
sys.path.append(packagesPath)

import tensorflow_datasets as tfds
import tensorflow as tf
import os, logging, sys
import time
import numpy as np
import matplotlib.pyplot as plt

from params.core import Core as Params
from layers.positionalEncoding import PositionalEncoding

logging.basicConfig(level=logging.INFO)
scriptParams = Params()
params = scriptParams.get()

positionalEncoding = PositionalEncoding(params.source_max_sequence_length,
                                        params.dimensions)
positionalEmbedding = positionalEncoding.getEmbedding()
print(positionalEmbedding.shape)

plt.pcolormesh(positionalEmbedding[0], cmap='RdBu')
plt.xlabel('Depth')
plt.xlim((0, 512))
plt.ylabel('Position')
plt.colorbar()
plt.show()