Beispiel #1
0
# redistribute it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Copyright Nils Schaetti <*****@*****.**>

# Imports
import torch.utils.data
import dataset
from echotorch.transforms import text

# Author identification dataset
pan18loader = torch.utils.data.DataLoader(dataset.AuthorIdentificationDataset(
    root="./data/", download=True, transform=text.GloveVector(), problem=1),
                                          batch_size=1,
                                          shuffle=True)

# Get training data for this fold
for i, data in enumerate(pan18loader):
    # Inputs and labels
    inputs, labels = data
# end for
    w = etnn.ESNCell.generate_w(reservoir_size, w_sparsity)

    # Sample average
    single_sample_average = np.array([])

    # For each problem
    for problem in np.arange(1, 3):
        # Truth and prediction
        y_true = np.array([])
        y_pred = np.array([])

        # Author identification training dataset
        pan18loader_training = torch.utils.data.DataLoader(
            dataset.AuthorIdentificationDataset(root="./data/",
                                                download=True,
                                                transform=transformer,
                                                problem=problem,
                                                lang=args.lang),
            batch_size=1,
            shuffle=True)

        # Author identification test dataset
        pan18loader_test = torch.utils.data.DataLoader(
            dataset.AuthorIdentificationDataset(root="./data/",
                                                download=True,
                                                transform=transformer,
                                                problem=problem,
                                                train=False,
                                                lang=args.lang),
            batch_size=1,
            shuffle=True)
Beispiel #3
0
import torch.utils.data
import dataset
from echotorch.transforms import text
import random
from torch.autograd import Variable

# Experience parameter
window_size = 500
batch_size = 64
sample_batch = 4
epoch_batches = 10
max_epoch = 1

# Author identification training dataset
pan18loader_training = torch.utils.data.DataLoader(
    dataset.AuthorIdentificationDataset(root="./data/", download=True, transform=text.Character(), problem=1),
    batch_size=1, shuffle=True)

# Author identification test dataset
pan18loader_test = torch.utils.data.DataLoader(
    dataset.AuthorIdentificationDataset(root="./data/", download=True, transform=text.Character(), problem=1,
                                        train=False),
    batch_size=1, shuffle=True)

# Authors
author_to_idx = dict()
for idx, author in enumerate(pan18loader_training.dataset.authors):
    author_to_idx[author] = idx
# end for

# Number of authors
text_length = 20

# Argument
args = tools.functions.argument_parser_training_model()

# Transforms
transform = transforms.Compose([
    transforms.Character(),
    transforms.ToIndex(start_ix=0),
    transforms.MaxIndex(max_id=83),
    transforms.ToNGram(n=text_length, overlapse=True),
    transforms.Reshape((-1, 20))
])

# Author identification training dataset
dataset_train = dataset.AuthorIdentificationDataset(root="./data/", download=True, transform=transform, problem=1, lang='en')

# Author identification test dataset
dataset_valid = dataset.AuthorIdentificationDataset(root="./data/", download=True, transform=transform, problem=1, train=False, lang='en')

# Cross validation
dataloader_train = torch.utils.data.DataLoader(torchlanguage.utils.CrossValidation(dataset_train), batch_size=1, shuffle=True)
dataloader_valid = torch.utils.data.DataLoader(torchlanguage.utils.CrossValidation(dataset_valid, train=False), batch_size=1, shuffle=True)

# Author to idx
author_to_ix = dict()
for idx, author in enumerate(dataset_train.authors):
    author_to_ix[author] = idx
# end for

# Model