Python get_filesの例

プログラミング言語: Python

名前空間/パッケージ名: data_getter

メソッド/関数: get_files

hotexamples.comのコード掲載数: 4

Python get_files - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdata_getter.get_filesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def load_text_pairs():
    """ Load text pairs from the specified file.

    Each text pair corresponds to a single line in the text file. Both texts (left and right one) in such pair are
    separated by the tab character. It is assumed that the text file has the UTF-8 encoding.

    :param file_name: name of file containing required text pairs.

    :return a 2-element tuple: the 1st contains list of left texts, the 2nd contains corresponding list of right texts.

    """
    input_texts, target_texts = data_getter.get_files(50)

    return input_texts, target_texts

コード例 #2

ファイルを表示

# %matplotlib inline
import re
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from keras.layers import Input, LSTM, Embedding, Dense
from keras.models import Model
import data_getter
from math import ceil
from rouge import Rouge

batch_size = 20
epochs = 10
num_textos = 100


raw_data = data_getter.get_files(num_textos)
input_texts, target_texts = [], []
for i in raw_data:
    input_texts.append(i[0])
    target_texts.append(i[1])

# lines= pd.read_table('./conversa/mar.txt', names=['eng', 'mar'])
# print(lines.mar)
input_texts, target_texts = (list(map(lambda x: x.lower(), input_texts)),
                             list(map(lambda x: x.lower(), target_texts)))
input_texts, target_texts = (list(map(lambda x: re.sub("'", '', x), input_texts)),
                             list(map(lambda x: re.sub("'", '', x), target_texts)))
input_texts, target_texts = (list(map(lambda x: re.sub('"', '', x), input_texts)),
                             list(map(lambda x: re.sub('"', '', x), target_texts)))

exclude = set(string.punctuation) # Set of all special characters

コード例 #3

ファイルを表示

from __future__ import print_function

from keras.models import Model
from keras.layers import Input, LSTM, Dense
import numpy as np
import data_getter
batch_size = 64  # Batch size for training.
epochs = 10  # Number of epochs to train for.
latent_dim = 10  # Latent dimensionality of the encoding space.
num_samples = 100  # Number of samples to train on.
# Path to the data txt file on disk.
data_path = 'fra-eng/fra.txt'

# Vectorize the data.
input_texts, target_texts = data_getter.get_files(num_samples)
input_characters = set()
target_characters = set()
# with open(data_path, 'r', encoding='utf-8') as f:
#     lines = f.read().split('\n')
for text, tag in zip(input_texts, target_texts):
    for char in text:
        if char not in input_characters:
            input_characters.add(char)
    for char in tag:
        if char not in target_characters:
            target_characters.add(char)

input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)

コード例 #4

ファイルを表示

batch_size = 64  # Batch size for training.
epochs = 100  # Number of epochs to train for.
latent_dim = 256  # Latent dimensionality of the encoding space.
num_samples = 7000  # Number of samples to train on.
# Path to the data txt file on disk.
data_path = '/home/maxtelll/Downloads/fra.txt'

# Vectorize the data.
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()

# with open(data_path, 'r', encoding='utf-8') as f:
#     lines = f.read().split('\n')
lines = get_files(num_samples)

for line in lines[:min(num_samples, len(lines) - 1)]:

    # input_text, target_text = line.split('\t')
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.

    input_text, target_text = line
    target_text = '\t' + target_text + '\n'
    input_texts.append(input_text)
    target_texts.append(target_text)
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)
    for char in target_text: