Esempio n. 1
0
# redistribute it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Copyright Nils Schaetti <*****@*****.**>

# Imports
import torch.utils.data
import dataset
from echotorch.transforms import text

# Author identification dataset
pan18loader = torch.utils.data.DataLoader(dataset.AuthorIdentificationDataset(
    root="./data/", download=True, transform=text.GloveVector(), problem=1),
                                          batch_size=1,
                                          shuffle=True)

# Get training data for this fold
for i, data in enumerate(pan18loader):
    # Inputs and labels
    inputs, labels = data
# end for
Esempio n. 2
0
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Copyright Nils Schaetti <*****@*****.**>

# Imports
import torch.utils.data
from echotorch import datasets
from echotorch.transforms import text

# Reuters C50 dataset
reutersloader = torch.utils.data.DataLoader(datasets.SFGramDataset(
    tokenizer=text.Token(),
    root="../../data/sfgram/",
    download=True,
    transform=text.GloveVector()),
                                            batch_size=1,
                                            shuffle=True)

# Get training data for this fold
for i, data in enumerate(reutersloader):
    # Inputs and labels
    inputs, labels = data
# end for
Esempio n. 3
0
                    type=int,
                    help="Number of features",
                    default=10)
parser.add_argument("--fold", type=int, help="Starting fold", default=0)
parser.add_argument("--steps", type=int, help="Steps", default=1)
parser.add_argument("--no-cuda",
                    action='store_true',
                    default=False,
                    help="Enables CUDA training")
args = parser.parse_args()

# Use CUDA?
args.cuda = not args.no_cuda and torch.cuda.is_available()

# Word embedding
transform = text.GloveVector(model='en_vectors_web_lg')

# Reuters C50 dataset
reutersloader = torch.utils.data.DataLoader(datasets.ReutersC50Dataset(
    download=True, n_authors=15, transform=transform),
                                            batch_size=1,
                                            shuffle=False)

# Loss function
# loss_function = nn.NLLLoss()
loss_function = nn.CrossEntropyLoss()

# 10-CV
for k in np.arange(args.fold, 10):
    # Model
    # model = CNNFeatureSelector(embedding_dim=embedding_dim, n_authors=n_authors)
Esempio n. 4
0
import torch.utils.data
import dataset
from echotorch.transforms import text

# Experience parameter
batch_size = 64
n_epoch = 1
window_size = 700
training_set_size = 10
test_set_size = 2
training_samples = training_set_size + test_set_size
stride = 100

# Style change detection dataset, training set
pan18loader_train = torch.utils.data.DataLoader(dataset.SCDSimpleDataset(
    root='./data/', download=True, transform=text.GloveVector(), train=True),
                                                batch_size=1)

# Style change detection dataset, validation set
pan18loader_valid = torch.utils.data.DataLoader(dataset.SCDSimpleDataset(
    root='./data/', download=True, transform=text.GloveVector(), train=False),
                                                batch_size=1)

# Get training data
for i, data in enumerate(pan18loader_train):
    # Inputs and c
    inputs, label = data

    # TRAINING
# end for
for space in param_space:
    # Params
    reservoir_size = int(space['reservoir_size'])
    w_sparsity = space['w_sparsity']
    leak_rate = space['leak_rate']
    input_scaling = space['input_scaling']
    input_sparsity = space['input_sparsity']
    spectral_radius = space['spectral_radius']
    transformer = space['transformer'][0][0]
    aggregation = space['aggregation'][0][0]
    state_gram = space['state_gram']
    dataset_start = int(space['dataset_start'])

    # Choose the right transformer
    if "wv" in transformer:
        reutersloader.dataset.transform = text.GloveVector()
    elif "pos" in transformer:
        reutersloader.dataset.transform = text.PartOfSpeech()
    elif "tag" in transformer:
        reutersloader.dataset.transform = text.Tag()
    elif "character" in transformer:
        reutersloader.dataset.transform = text.Character()
    elif "fw" in transformer:
        reutersloader.dataset.transform = text.FunctionWord()
    else:
        print(u"No transformer set!")
        exit()
    # end if

    # Dataset start
    reutersloader.dataset.set_start(dataset_start)