Beispiel #1
0
    def _set_dataset(self):
        if self._dataset is not None:
            return

        self._seed()

        self._dataset = Dataset(
            filename=self.args.datafile,
            folder=self.args.dataroot,
            transformer=self.importer["transformer"],
            normalize=self.args.normalize,
        )

        if self.args.verbose:
            print("dataset loaded, {} classes in total".format(
                self._dataset.num_classes))
            print("train_shape = {}, test_shape = {}".format(
                self._dataset.train.X.shape, self._dataset.test.X.shape))

        self._dataset.filter(labels=self.args.labels)
        if self.args.balance:
            self._dataset.balance()
        self._dataset.sample(train_size=self.args.size,
                             test_size=self.args.size)

        if self.args.verbose:
            print("dataset downsampled, {} classes in total".format(
                self._dataset.num_classes))
            print("train_shape = {}, test_shape = {}".format(
                self._dataset.train.X.shape, self._dataset.test.X.shape))
Beispiel #2
0
def main(fpath):
    ENC_EMB_DIM = 256
    DEC_EMB_DIM = 256
    ENC_HID_DIM = 512
    DEC_HID_DIM = 512
    ENC_DROPOUT = 0.5
    DEC_DROPOUT = 0.5

    device = torch.device('cuda')
    dataset = Dataset()
    INPUT_DIM = len(dataset.SRC.vocab)
    OUTPUT_DIM = len(dataset.TRG.vocab)
    SRC_PAD_IDX = dataset.SRC.vocab.stoi[dataset.SRC.pad_token]

    encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                      ENC_DROPOUT)
    attention = Attention(ENC_HID_DIM, DEC_HID_DIM)
    decoder = Decoder(DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, OUTPUT_DIM,
                      DEC_DROPOUT, attention)
    model = Seq2Seq(encoder, decoder, SRC_PAD_IDX, device)
    model.load_state_dict(torch.load("best_model.pt"))
    model.to(device)
    with open(fpath, "r") as f:
        sentences = f.readlines()

    translate_sentence(model, sentences, dataset.SRC, dataset.TRG, device)
 def test_build_search_session(self):
     importer = ReflexiveImporter("neural_net_adam")
     dataset = Dataset(folder="../dataset")
     self.session = SearchSession(importer.model,
                                  importer.param_dist,
                                  dataset,
                                  n_iter=1,
                                  cv=3)
 def __init__(self):
     self.val_inc_set = parameters.full_val_inc_set
     self.net_income_dict = None
     self.count_dict = None
     self.val_inc_count = None
     self.dataset = Dataset()
     self.initial_price = None
     self.return_ratio_dict = None
     self.full_inc_set = dict()
     self.full_count_dict = dict()
Beispiel #5
0
def main():
    BATCH_SIZE = 32
    NUM_EPOCH = 12
    LR = 0.001
    CLIP = 1
    STEP_SIZE = 4
    GAMMA = 0.1
    ENC_EMB_DIM = 256
    DEC_EMB_DIM = 256
    ENC_HID_DIM = 512
    DEC_HID_DIM = 512
    ENC_DROPOUT = 0.5
    DEC_DROPOUT = 0.5

    device = torch.device('cuda')

    dataset = Dataset()
    train_data, valid_data, test_data = dataset.build_dataset()
    train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
        (train_data, valid_data, test_data),
        batch_size=BATCH_SIZE,
        sort_within_batch=True,
        sort_key=lambda x: len(x.src),
        device=device)

    INPUT_DIM = len(dataset.SRC.vocab)
    OUTPUT_DIM = len(dataset.TRG.vocab)
    SRC_PAD_IDX = dataset.SRC.vocab.stoi[dataset.SRC.pad_token]
    TRG_PAD_IDX = dataset.TRG.vocab.stoi[dataset.TRG.pad_token]

    encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                      ENC_DROPOUT)
    attention = Attention(ENC_HID_DIM, DEC_HID_DIM)
    decoder = Decoder(DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, OUTPUT_DIM,
                      DEC_DROPOUT, attention)
    model = Seq2Seq(encoder, decoder, SRC_PAD_IDX, device)
    model.apply(init_weight)
    model.to(device)
    optimizer = Adam(model.parameters(), lr=LR)
    criterion = CrossEntropyLoss(ignore_index=TRG_PAD_IDX).to(device)
    scheduler = StepLR(optimizer, STEP_SIZE, GAMMA)

    min_valid_loss = 1e10

    for e in range(NUM_EPOCH):
        print("Epoch: {}".format(e + 1))
        train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
        print("Train loss: {}".format(train_loss))
        valid_loss = evaluate(model, valid_iterator, criterion)
        print("Valid loss: {}".format(valid_loss))

        if valid_loss < min_valid_loss:
            torch.save(model.state_dict(), "best_model.pt")
            min_valid_loss = valid_loss
def main():
    """Load data, train network, visualize results."""
    data_dir = 'data/'
    trainset = loadmat(data_dir + 'train_32x32.mat')
    testset = loadmat(data_dir + 'test_32x32.mat')
    dataset = Dataset(trainset, testset)

    tf.reset_default_graph()
    dcgan = DCGAN(dataset)

    losses, samples = dcgan.train()

    # samples, losses = dcgan.load_pickle_data()

    dcgan.view_samples(-1, samples)
    dcgan.visualize_loss(losses)
Beispiel #7
0
def train_and_evaluate(model, epochs, batches, gpus=[], dual=False, plot_history=False, plot_model=False):
    import keras, tensorflow as tf
    from keras import utils

    if len(gpus) > 0:
        os.environ["CUDA_VISIBLE_DEVICES"]=','.join(gpus)

        config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
        sess = tf.Session(config=config)
        keras.backend.set_session(sess)
        keras.backend.get_session().run(tf.global_variables_initializer())

    if plot_model:
        if dual:
            utils.plot_model(model, to_file='dual_model.png', show_shapes=True)
        else:
            utils.plot_model(model, to_file='single_model.png', show_shapes=True)

    fetcher = DataFetcher()
    current_epochs = 0
    history = None

    if dual:
        data_type = 'split'
    else:
        data_type = 'stack'

    for samples in fetcher.fetch_inf(type=data_type):
        if current_epochs >= epochs:
            break

        if dual:
            (x_train1, x_train2, y_train), (x_test1, x_test2, y_test) = samples

            history = model.fit(
                [x_train1, x_train2], y_train,
                batch_size=batches,
                epochs=EPOCHS_BATCH + current_epochs,
                initial_epoch=current_epochs,
                verbose=1,
                validation_data=([x_test1, x_test2], y_test),
            )
            model.save(DUAL_MODEL_NAME)
        else:
            (x_train, y_train), (x_test, y_test) = samples

            history = model.fit(
                x_train, y_train,
                batch_size=batches,
                epochs=EPOCHS_BATCH + current_epochs,
                initial_epoch=current_epochs,
                verbose=1,
                validation_data=(x_test, y_test),
            )
            model.save(SINGLE_MODEL_NAME)

        current_epochs += EPOCHS_BATCH

    if plot_history:
        import matplotlib.pyplot as plt

        # Plot training & validation accuracy values
        plt.plot(history.history['acc'])
        plt.plot(history.history['val_acc'])
        plt.title('Model accuracy')
        plt.ylabel('Accuracy')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Test'], loc='upper left')
        plt.show()

        # Plot training & validation loss values
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('Model loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Test'], loc='upper left')
        plt.show()

    dataset = Dataset()
    dataset.load(number=0)

    if dual:
        (x_train1, x_train2, y_train), (x_test1, x_test2, y_test) = dataset.data(type='split')
        score = model.evaluate([x_test1, x_test2], y_test, verbose=0)
        model.save(DUAL_MODEL_NAME)
    else:
        (x_train, y_train), (x_test, y_test) = dataset.data(type='stack')
        score = model.evaluate(x_test, y_test, verbose=0)
        model.save(SINGLE_MODEL_NAME)

    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
Beispiel #8
0
    BATCH = 16
    START_LR = 1e-3
    STOP_LR = 1e-4
    DECAY_OVER = 400000


args.parse_args()

with open(args.CONFIG, "r") as config:
    config = yaml.safe_load(config)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = DDSP(**config["model"]).to(device)

dataset = Dataset(config["preprocess"]["out_dir"])

dataloader = torch.utils.data.DataLoader(
    dataset,
    args.BATCH,
    True,
    drop_last=True,
)

mean_loudness, std_loudness = mean_std_loudness(dataloader)
config["data"]["mean_loudness"] = mean_loudness
config["data"]["std_loudness"] = std_loudness

writer = SummaryWriter(path.join(args.ROOT, args.NAME), flush_secs=20)

with open(path.join(args.ROOT, args.NAME, "config.yaml"), "w") as out_config:
Beispiel #9
0
def main(args):
    # set up logs and device
    args.save_dir = get_save_dir(args.save_dir, args.name)
    log = get_logger(args.save_dir, args.name)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')

    # set random seed
    log.info(f'Using random seed {args.seed}...')
    set_seeds(args.seed)

    # create dataset using torchtext
    log.info(f'Build data fields and {args.bert_variant} tokenizer...')
    dataset = Dataset(args.bert_variant)
    TEXT, LABEL = dataset.get_fields()

    # train:valid:test = 17500:7500:25000
    log.info('Build IMDb dataset using torchtext.datasets...')
    train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
    train_data, valid_data = train_data.split(
        random_state=random.seed(args.seed))

    # iterators
    train_iterator, valid_iterator, test_iterator = dataset.get_iterators(
        train_data, valid_data, test_data, args.batch_size, device)

    # build LABEL vocabulary
    LABEL.build_vocab(train_data)

    # define model
    log.info('Building model...')
    model = BERTSentiment(args.bert_variant, args.hidden_dim, args.output_dim,
                          args.n_layers, args.bidirectional, args.dropout)

    # optimizer
    optimizer = optim.Adam(model.parameters())

    # criterion
    criterion = nn.BCEWithLogitsLoss()

    # place model and criterion on device
    model = model.to(device)
    criterion = criterion.to(device)

    # train set and validation set
    best_valid_loss = float('inf')
    for epoch in range(args.num_epochs):

        start_time = time.time()

        log.info(f'Training, epoch = {epoch}...')
        train_loss, train_acc = train(model, train_iterator, optimizer,
                                      criterion)

        log.info(f'Evaluating, epoch = {epoch}...')
        valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss < best_valid_loss:
            log.info(f'Saving best model...')
            best_valid_loss = valid_loss
            torch.save(model.state_dict(),
                       f'{args.save_dir}/{args.model_name}')

    log.info('Model trained and evaluated...')

    # test set
    log.info('Testing...')
    model.load_state_dict(torch.load(f'{args.save_dir}/{args.model_name}'))
    test_loss, test_acc = evaluate(model, test_iterator, criterion)
    print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
 def __init__(self):
     self.dataset = Dataset()
     self.dataset.get_data()
     self.volatility = None
Beispiel #11
0
import os
from preprocess import Dataset
from global_utils import dump, JsonMetricQueueWriter
from .search_session import SearchSession
from .sklearn_args import SklearnSessionParser, SklearnSessionArgs
from reflexive_import import ReflexiveImporter

if __name__ == '__main__':
    parser = SklearnSessionParser()
    args = SklearnSessionArgs(parser)

    dataset = Dataset(args.datafile, args.dataroot)
    dataset.filter(args.labels)
    if args.balance:
        dataset.balance()
    dataset.sample(args.size)

    importer = ReflexiveImporter(module_name=args.model,
                                 var_list=["model", "parameter_distribution"],
                                 alias_list=["model", "param"])
    session = SearchSession(importer["model"], importer["param"], dataset,
                            args.n_iter, args.cv)
    session.report_args()

    # tune (search for) hyper-parameters
    session.fit()
    session.report_best()
    session.report_result()
    dump(session.search_results, os.path.join(args.output,
                                              "search-results.pkl"))
 def setUp(self):
     self.dataset = Dataset(folder="../dataset")
     self.n_train = len(self.dataset.train)
     self.n_test = len(self.dataset.test)
Beispiel #13
0
 def __init__(self):
     self.dataset = Dataset()
     self.dataset.get_data()