Python AuthorIdentificationDataset 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: dataset

메소드/함수: AuthorIdentificationDataset

hotexamples.com에서의 예제들: 4

Python AuthorIdentificationDataset - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 dataset.AuthorIdentificationDataset에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

# redistribute it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Copyright Nils Schaetti <*****@*****.**>

# Imports
import torch.utils.data
import dataset
from echotorch.transforms import text

# Author identification dataset
pan18loader = torch.utils.data.DataLoader(dataset.AuthorIdentificationDataset(
    root="./data/", download=True, transform=text.GloveVector(), problem=1),
                                          batch_size=1,
                                          shuffle=True)

# Get training data for this fold
for i, data in enumerate(pan18loader):
    # Inputs and labels
    inputs, labels = data
# end for

예제 #2

파일 보기

파일: model_bdesn_w_optim.py 프로젝트: pan-webis-de/schaetti18b

    w = etnn.ESNCell.generate_w(reservoir_size, w_sparsity)

    # Sample average
    single_sample_average = np.array([])

    # For each problem
    for problem in np.arange(1, 3):
        # Truth and prediction
        y_true = np.array([])
        y_pred = np.array([])

        # Author identification training dataset
        pan18loader_training = torch.utils.data.DataLoader(
            dataset.AuthorIdentificationDataset(root="./data/",
                                                download=True,
                                                transform=transformer,
                                                problem=problem,
                                                lang=args.lang),
            batch_size=1,
            shuffle=True)

        # Author identification test dataset
        pan18loader_test = torch.utils.data.DataLoader(
            dataset.AuthorIdentificationDataset(root="./data/",
                                                download=True,
                                                transform=transformer,
                                                problem=problem,
                                                train=False,
                                                lang=args.lang),
            batch_size=1,
            shuffle=True)

예제 #3

파일 보기

import torch.utils.data
import dataset
from echotorch.transforms import text
import random
from torch.autograd import Variable

# Experience parameter
window_size = 500
batch_size = 64
sample_batch = 4
epoch_batches = 10
max_epoch = 1

# Author identification training dataset
pan18loader_training = torch.utils.data.DataLoader(
    dataset.AuthorIdentificationDataset(root="./data/", download=True, transform=text.Character(), problem=1),
    batch_size=1, shuffle=True)

# Author identification test dataset
pan18loader_test = torch.utils.data.DataLoader(
    dataset.AuthorIdentificationDataset(root="./data/", download=True, transform=text.Character(), problem=1,
                                        train=False),
    batch_size=1, shuffle=True)

# Authors
author_to_idx = dict()
for idx, author in enumerate(pan18loader_training.dataset.authors):
    author_to_idx[author] = idx
# end for

# Number of authors

예제 #4

파일 보기

파일: model_bdesn_ccsaa_training.py 프로젝트: nschaetti/PAN18-Authorship-Identification

text_length = 20

# Argument
args = tools.functions.argument_parser_training_model()

# Transforms
transform = transforms.Compose([
    transforms.Character(),
    transforms.ToIndex(start_ix=0),
    transforms.MaxIndex(max_id=83),
    transforms.ToNGram(n=text_length, overlapse=True),
    transforms.Reshape((-1, 20))
])

# Author identification training dataset
dataset_train = dataset.AuthorIdentificationDataset(root="./data/", download=True, transform=transform, problem=1, lang='en')

# Author identification test dataset
dataset_valid = dataset.AuthorIdentificationDataset(root="./data/", download=True, transform=transform, problem=1, train=False, lang='en')

# Cross validation
dataloader_train = torch.utils.data.DataLoader(torchlanguage.utils.CrossValidation(dataset_train), batch_size=1, shuffle=True)
dataloader_valid = torch.utils.data.DataLoader(torchlanguage.utils.CrossValidation(dataset_valid, train=False), batch_size=1, shuffle=True)

# Author to idx
author_to_ix = dict()
for idx, author in enumerate(dataset_train.authors):
    author_to_ix[author] = idx
# end for

# Model