Ejemplo n.º 1
0
def main(load=False, lr=1e-3, savestr="", reset=True, palette=False):
    total_epochs = 10
    iter_per_epoch = 100000
    lr = lr
    optim = None
    starting_epoch = 0
    starting_iteration = 0
    logfile = "log.txt"

    num_workers = 3
    ig = InputGenD()
    trainds, validds = train_valid_split(ig, split_fold=10)
    traindl = DataLoader(dataset=trainds,
                         batch_size=1,
                         num_workers=num_workers)
    validdl = DataLoader(dataset=validds, batch_size=1)
    print("Using", num_workers, "workers for training set")
    computer = NotMySam(input_size=47764,
                        hidden_size=128,
                        last_output_size=3620,
                        rnn_type='lstm',
                        num_layers=4,
                        nr_cells=100,
                        cell_size=32,
                        read_heads=4,
                        sparse_reads=4,
                        batch_first=True,
                        gpu_id=0)
    # load model:
    if load:
        print("loading model")
        computer, optim, starting_epoch, starting_iteration = load_model(
            computer, optim, starting_epoch, starting_iteration, savestr)

    computer = computer.cuda()
    if optim is None:
        print("Using Adam with lr", lr)
        optimizer = torch.optim.Adam(
            [i for i in computer.parameters() if i.requires_grad], lr=lr)
    else:
        # print('use Adadelta optimizer with learning rate ', lr)
        # optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr)
        optimizer = optim

    real_criterion = nn.SmoothL1Loss()
    binary_criterion = nn.BCEWithLogitsLoss(size_average=False)

    # starting with the epoch after the loaded one

    train(computer, optimizer, real_criterion, binary_criterion, traindl,
          iter(validdl), int(starting_epoch), total_epochs,
          int(starting_iteration), iter_per_epoch, savestr, logfile)
Ejemplo n.º 2
0
def main(load=False, lr=1e-4, savestr="6"):
    total_epochs = 10
    iter_per_epoch = 10000
    lr = lr
    optim = None
    starting_epoch = 0
    starting_iteration = 0
    logfile = "smalltacolog.txt"

    num_workers = 32
    ig = InputGenD()
    # multiprocessing disabled, because socket request seems unstable.
    # performance should not be too bad?
    trainds, validds = train_valid_split(ig, split_fold=10)
    traindl = DataLoader(dataset=trainds,
                         batch_size=32,
                         num_workers=num_workers,
                         collate_fn=pad_collate)
    validdl = DataLoader(dataset=validds,
                         batch_size=8,
                         num_workers=4,
                         collate_fn=pad_collate)
    print("Using", num_workers, "workers for training set")
    computer = Tacotron()

    # load model:
    if load:
        print("loading model")
        computer, optim, starting_epoch, starting_iteration = load_model(
            computer, optim, starting_epoch, starting_iteration, savestr)

    computer = computer.cuda()
    if optim is None:
        print("Using Adam with lr", lr)
        optimizer = torch.optim.Adam(
            [i for i in computer.parameters() if i.requires_grad], lr=lr)
    else:
        # print('use Adadelta optimizer with learning rate ', lr)
        # optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr)
        optimizer = optim

    real_criterion = nn.SmoothL1Loss()
    # time-wise sum, label-wise average.
    binary_criterion = nn.BCEWithLogitsLoss()

    # starting with the epoch after the loaded one

    train(computer, optimizer, real_criterion, binary_criterion,
          traindl, validdl, int(starting_epoch), total_epochs,
          int(starting_iteration), iter_per_epoch, savestr, logfile)
Ejemplo n.º 3
0
def main():
    total_epochs = 10
    iter_per_epoch = 100000
    lr = 1e-5
    target_dim = 3656
    logfile = "log.txt"

    num_workers = 3
    ig = InputGenD()
    # multiprocessing disabled, because socket request seems unstable.
    # performance should not be too bad?
    trainds, validds = train_valid_split(ig, split_fold=10)
    traindl = DataLoader(dataset=trainds,
                         batch_size=1,
                         num_workers=num_workers)
    validdl = DataLoader(dataset=validds, batch_size=1)
    print("Using", num_workers, "workers for training set")

    computer = DNC()
    computer.train()

    # load model:
    if True:
        print("loading model")
        computer, optim, starting_epoch, starting_iteration = load_model(
            computer)

    computer = computer.cuda()
    if optim is None:
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                            computer.parameters()),
                                     lr=lr)
    else:
        # print('use Adadelta optimizer with learning rate ', lr)
        # optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr)
        optimizer = optim

    real_criterion = nn.SmoothL1Loss()
    binary_criterion = nn.BCEWithLogitsLoss(size_average=False)

    # starting with the epoch after the loaded one

    train(computer, optimizer, real_criterion, binary_criterion, traindl,
          iter(validdl), int(starting_epoch), total_epochs,
          int(starting_iteration), iter_per_epoch, target_dim, logfile)
Ejemplo n.º 4
0
def main(load=False, lr=1e-3):
    total_epochs = 10
    iter_per_epoch = 100000
    lr = lr
    optim = None
    starting_epoch = 0
    starting_iteration = 0
    logfile = "log.txt"

    num_workers = 3
    ig = InputGenD()
    trainds, validds = train_valid_split(ig, split_fold=10)
    traindl = DataLoader(dataset=trainds,
                         batch_size=1,
                         num_workers=num_workers)
    validdl = DataLoader(dataset=validds, batch_size=1)
    print("Using", num_workers, "workers for training set")
    computer = DNC()

    # load model:
    if load:
        print("loading model")
        computer, optim, starting_epoch, starting_iteration = load_model(
            computer, optim, starting_epoch, starting_iteration)

    computer = computer.cuda()
    if optim is None:
        print("Using Adam with lr", lr)
        optimizer = torch.optim.Adam(
            [i for i in computer.parameters() if i.requires_grad], lr=lr)
    else:
        # print('use Adadelta optimizer with learning rate ', lr)
        # optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr)
        optimizer = optim

    real_criterion = nn.SmoothL1Loss()
    binary_criterion = nn.BCEWithLogitsLoss(size_average=False)

    # starting with the epoch after the loaded one

    train(computer, optimizer, real_criterion, binary_criterion, traindl,
          iter(validdl), int(starting_epoch), total_epochs,
          int(starting_iteration), iter_per_epoch, logfile)
Ejemplo n.º 5
0
# this file contains all the statistical models for baselines.
# I chose to do all of these in Python, not R, because the data is finalized in Python, not R. We can import from
# python to R, but it seems unreasonable.

from death.post.inputgen_planD import InputGenD, train_valid_split
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

# Logistic regression.
# The goal is bunch of binary values.

ig = InputGenD()
trainds, validds = train_valid_split(ig, split_fold=10)

lr = LogisticRegression(random_state=0, multi_class="ovr")

# X is a nd array with (150,4)
# y is a nd array with (150)
X, y = load_iris(return_X_y=True)
# lrfit=lr.fit(X,y)

print("end")
Ejemplo n.º 6
0
### To see the pandas data frames
# Note that DFManager allows you to load from pickle file or raw csv files
# pickle loading is much faster. loading raw rebuilds pickle
#
from death.post.dfmanager import DFManager
dfs = DFManager()
dfs.load_pickle(verbose=True)
# from here, you can see all dataframes as dfs' properties
# for example, this is demographics csv:
print(dfs.demo)

# if you want to load_raw and rebuild pickle files, run:
dfs.load_raw(save=True)
# making dictionary necessary for one-hot encodings
dfs.make_dictionary(verbose=True, save=True, skip=False)


### To see the inputs and outputs used by the deep learning model
from death.post.inputgen_planD import InputGenD, train_valid_split
ig = InputGenD(verbose=False)
# split to training set and validation set if you want
# it's fine if you don't do this step
train, valid = train_valid_split(ig)
# __getitem__() method is how you should access this dataset
print(train[123])


### Loading into PyTorch is trickier, because sequences don't have even lengths
# I have two solutions, one with ChannelManager and one with padded sequences
# you should see the script for BatchDNC and Tacotron respectively.