예제 #1
0
def main(args):
    if args.use_cuda:
        device = 'cuda:' + str(args.gpu_id)
    else:
        device = 'cpu'

    print('\nLoading dataset')
    #LOAD DATASET
    with open(args.predictors_path, 'rb') as f:
        predictors = pickle.load(f)
    with open(args.target_path, 'rb') as f:
        target = pickle.load(f)
    predictors = np.array(predictors)
    target = np.array(target)

    print('\nShapes:')
    print('Predictors: ', predictors.shape)

    #convert to tensor
    predictors = torch.tensor(predictors).float()
    target = torch.tensor(target).float()
    #build dataset from tensors
    dataset_ = utils.TensorDataset(predictors, target)
    #build data loader from dataset
    dataloader = utils.DataLoader(dataset_, 1, shuffle=False, pin_memory=True)

    if not os.path.exists(args.results_path):
        os.makedirs(args.results_path)

    #LOAD MODEL
    if args.architecture == 'fasnet':
        model = FaSNet_origin(enc_dim=args.enc_dim,
                              feature_dim=args.feature_dim,
                              hidden_dim=args.hidden_dim,
                              layer=args.layer,
                              segment_size=args.segment_size,
                              nspk=args.nspk,
                              win_len=args.win_len,
                              context_len=args.context_len,
                              sr=args.sr)
    elif args.architecture == 'tac':
        model = FaSNet_TAC(enc_dim=args.enc_dim,
                           feature_dim=args.feature_dim,
                           hidden_dim=args.hidden_dim,
                           layer=args.layer,
                           segment_size=args.segment_size,
                           nspk=args.nspk,
                           win_len=args.win_len,
                           context_len=args.context_len,
                           sr=args.sr)
    if args.use_cuda:
        print("Moving model to gpu")
    model = model.to(device)

    #load checkpoint
    state = load_model(model, None, args.model_path, args.use_cuda)

    #COMPUTING METRICS
    print("COMPUTING TASK 1 METRICS")
    print('M: Final Task 1 metric')
    print('W: Word Error Rate')
    print('S: Stoi')

    WER = 0.
    STOI = 0.
    METRIC = 0.
    count = 0
    model.eval()
    with tqdm(total=len(dataloader) // 1) as pbar, torch.no_grad():
        for example_num, (x, target) in enumerate(dataloader):

            outputs = enhance_sound(x, model, device, args.segment_length,
                                    args.segment_overlap)

            outputs = np.squeeze(outputs)
            target = np.squeeze(target)

            outputs = outputs / np.max(outputs) * 0.9  #normalize prediction
            metric, wer, stoi = task1_metric(target, outputs)

            if metric is not None:

                METRIC += (1. / float(example_num + 1)) * (metric - METRIC)
                WER += (1. / float(example_num + 1)) * (wer - WER)
                STOI += (1. / float(example_num + 1)) * (stoi - STOI)

                #save sounds
                if args.save_sounds_freq is not None:
                    sounds_dir = os.path.join(args.results_path, 'sounds')
                    if not os.path.exists(sounds_dir):
                        os.makedirs(sounds_dir)

                    if count % args.save_sounds_freq == 0:
                        sf.write(
                            os.path.join(sounds_dir,
                                         str(example_num) + '.wav'), outputs,
                            16000, 'PCM_16')
                        print('metric: ', metric, 'wer: ', wer, 'stoi: ', stoi)
            else:
                print('No voice activity on this frame')
            pbar.set_description('M:' + str(np.round(METRIC, decimals=3)) +
                                 ', W:' + str(np.round(WER, decimals=3)) +
                                 ', S: ' + str(np.round(STOI, decimals=3)))
            pbar.update(1)
            count += 1

    #visualize and save results
    results = {'word error rate': WER, 'stoi': STOI, 'task 1 metric': METRIC}

    print('RESULTS')
    for i in results:
        print(i, results[i])
    out_path = os.path.join(args.results_path, 'task1_metrics_dict.json')
    np.save(out_path, results)
    '''
예제 #2
0
    data_get = torch.zeros(data.shape[0], size, size)
    data = data.cpu()
    for i in range(data.shape[0]):
        # print(i)
        temp = unloader(data[i])
        temp = crop(temp)
        temp = loader(temp)
        data_get[i] = temp
    data_get = torch.unsqueeze(data_get, dim=1).type(torch.FloatTensor).cuda()
    return data_get


batch_size = 64

# 将训练数据的特征和标签组合
dataset = Data.TensorDataset(train_x, train_y)

# 把 dataset 放入 DataLoader
data_iter = Data.DataLoader(
    dataset=dataset,  # torch TensorDataset format
    batch_size=batch_size,  # mini batch size
    shuffle=True,  # 要不要打乱数据 (打乱比较好)
    num_workers=0,  # 多线程来读数据, 注意多线程需要在 if __name__ == '__main__': 函数中运行
)
# num_workers=0 表示不用额外的进程来加速读取数据

testset = Data.TensorDataset(test_x, test_y)
test_iter = Data.DataLoader(
    dataset=testset,  # torch TensorDataset format
    batch_size=batch_size,  # mini batch size
    shuffle=False,  # 要不要打乱数据
예제 #3
0
    def train(
        self,
        training_batch_size: int = 50,
        learning_rate: float = 5e-4,
        validation_fraction: float = 0.1,
        stop_after_epochs: int = 20,
        max_num_epochs: Optional[int] = None,
        clip_max_norm: Optional[float] = 5.0,
        calibration_kernel: Optional[Callable] = None,
        exclude_invalid_x: bool = True,
        resume_training: bool = False,
        discard_prior_samples: bool = False,
        retrain_from_scratch_each_round: bool = False,
        show_train_summary: bool = False,
        dataloader_kwargs: Optional[dict] = None,
    ) -> DirectPosterior:
        r"""
        Return density estimator that approximates the distribution $p(\theta|x)$.

        Args:
            training_batch_size: Training batch size.
            learning_rate: Learning rate for Adam optimizer.
            validation_fraction: The fraction of data to use for validation.
            stop_after_epochs: The number of epochs to wait for improvement on the
                validation set before terminating training.
            max_num_epochs: Maximum number of epochs to run. If reached, we stop
                training even when the validation loss is still decreasing. If None, we
                train until validation loss increases (see also `stop_after_epochs`).
            clip_max_norm: Value at which to clip the total gradient norm in order to
                prevent exploding gradients. Use None for no clipping.
            calibration_kernel: A function to calibrate the loss with respect to the
                simulations `x`. See Lueckmann, Gonçalves et al., NeurIPS 2017.
            exclude_invalid_x: Whether to exclude simulation outputs `x=NaN` or `x=±∞`
                during training. Expect errors, silent or explicit, when `False`.
            resume_training: Can be used in case training time is limited, e.g. on a
                cluster. If `True`, the split between train and validation set, the
                optimizer, the number of epochs, and the best validation log-prob will
                be restored from the last time `.train()` was called.
            discard_prior_samples: Whether to discard samples simulated in round 1, i.e.
                from the prior. Training may be sped up by ignoring such less targeted
                samples.
            retrain_from_scratch_each_round: Whether to retrain the conditional density
                estimator for the posterior from scratch each round.
            show_train_summary: Whether to print the number of epochs and validation
                loss after the training.
            dataloader_kwargs: Additional or updated kwargs to be passed to the training
                and validation dataloaders (like, e.g., a collate_fn)

        Returns:
            Density estimator that approximates the distribution $p(\theta|x)$.
        """

        # Calibration kernels proposed in Lueckmann, Gonçalves et al., 2017.
        if calibration_kernel is None:
            calibration_kernel = lambda x: ones([len(x)], device=self._device)

        max_num_epochs = 2**31 - 1 if max_num_epochs is None else max_num_epochs

        # Starting index for the training set (1 = discard round-0 samples).
        start_idx = int(discard_prior_samples and self._round > 0)

        # For non-atomic loss, we can not reuse samples from previous rounds as of now.
        # SNPE-A can, by construction of the algorithm, only use samples from the last
        # round. SNPE-A is the only algorithm that has an attribute `_ran_final_round`,
        # so this is how we check for whether or not we are using SNPE-A.
        if self.use_non_atomic_loss or hasattr(self, "_ran_final_round"):
            start_idx = self._round

        theta, x, prior_masks = self.get_simulations(start_idx,
                                                     exclude_invalid_x,
                                                     warn_on_invalid=True)

        # Dataset is shared for training and validation loaders.
        dataset = data.TensorDataset(
            theta,
            x,
            prior_masks,
        )

        # Set the proposal to the last proposal that was passed by the user. For
        # atomic SNPE, it does not matter what the proposal is. For non-atomic
        # SNPE, we only use the latest data that was passed, i.e. the one from the
        # last proposal.
        proposal = self._proposal_roundwise[-1]

        train_loader, val_loader = self.get_dataloaders(
            dataset,
            training_batch_size,
            validation_fraction,
            resume_training,
            dataloader_kwargs=dataloader_kwargs,
        )

        # First round or if retraining from scratch:
        # Call the `self._build_neural_net` with the rounds' thetas and xs as
        # arguments, which will build the neural network.
        # This is passed into NeuralPosterior, to create a neural posterior which
        # can `sample()` and `log_prob()`. The network is accessible via `.net`.
        if self._neural_net is None or retrain_from_scratch_each_round:
            self._neural_net = self._build_neural_net(
                theta[self.train_indices], x[self.train_indices])
            # If data on training device already move net as well.
            if (not self._device == "cpu"
                    and f"{x.device.type}:{x.device.index}" == self._device):
                self._neural_net.to(self._device)

            test_posterior_net_for_multi_d_x(self._neural_net, theta, x)
            self._x_shape = x_shape_from_simulation(x)

        # Move entire net to device for training.
        self._neural_net.to(self._device)

        if not resume_training:
            self.optimizer = optim.Adam(
                list(self._neural_net.parameters()),
                lr=learning_rate,
            )
            self.epoch, self._val_log_prob = 0, float("-Inf")

        while self.epoch <= max_num_epochs and not self._converged(
                self.epoch, stop_after_epochs):

            # Train for a single epoch.
            self._neural_net.train()
            train_log_prob_sum = 0
            epoch_start_time = time.time()
            for batch in train_loader:
                self.optimizer.zero_grad()
                # Get batches on current device.
                theta_batch, x_batch, masks_batch = (
                    batch[0].to(self._device),
                    batch[1].to(self._device),
                    batch[2].to(self._device),
                )

                batch_loss = torch.mean(
                    self._loss(
                        theta_batch,
                        x_batch,
                        masks_batch,
                        proposal,
                        calibration_kernel,
                    ))

                train_log_prob_sum += batch_loss.sum().item()

                batch_loss.backward()
                if clip_max_norm is not None:
                    clip_grad_norm_(
                        self._neural_net.parameters(),
                        max_norm=clip_max_norm,
                    )
                self.optimizer.step()

            self.epoch += 1

            train_log_prob_sum /= int(theta.shape[0] *
                                      (1.0 - validation_fraction))
            self._summary["train_log_probs"].append(train_log_prob_sum)

            # Calculate validation performance.
            self._neural_net.eval()
            log_prob_sum = 0

            with torch.no_grad():
                for batch in val_loader:
                    theta_batch, x_batch, masks_batch = (
                        batch[0].to(self._device),
                        batch[1].to(self._device),
                        batch[2].to(self._device),
                    )
                    # Take negative loss here to get validation log_prob.
                    batch_log_prob = -self._loss(
                        theta_batch,
                        x_batch,
                        masks_batch,
                        proposal,
                        calibration_kernel,
                    )
                    log_prob_sum += batch_log_prob.sum().item()

            # Take mean over all validation samples.
            self._val_log_prob = log_prob_sum / (len(val_loader) *
                                                 val_loader.batch_size)
            # Log validation log prob for every epoch.
            self._summary["validation_log_probs"].append(self._val_log_prob)
            self._summary["epoch_durations_sec"].append(time.time() -
                                                        epoch_start_time)

            self._maybe_show_progress(self._show_progress_bars, self.epoch)

        self._report_convergence_at_end(self.epoch, stop_after_epochs,
                                        max_num_epochs)

        # Update summary.
        self._summary["epochs"].append(self.epoch)
        self._summary["best_validation_log_probs"].append(
            self._best_val_log_prob)

        # Update tensorboard and summary dict.
        self._summarize(
            round_=self._round,
            x_o=None,
            theta_bank=theta,
            x_bank=x,
        )

        # Update description for progress bar.
        if show_train_summary:
            print(self._describe_round(self._round, self._summary))

        return deepcopy(self._neural_net)
예제 #4
0
# offline_archs = []

if (ARCH in offline_archs):
    # construct the transformer
    batch_size = 512
    z = torch.FloatTensor(np.load('{}.z.npy'.format(ARCH)))
    y = torch.LongTensor(np.load('{}.y.npy'.format(ARCH)))
    # to do some truncating
    batch_num = z.shape[0] // batch_size
    print(batch_num)
    ARGS.save_p += ".{:.1f}".format(TRUNCATE_RATIO)
    current_batch_num = int(batch_num * TRUNCATE_RATIO)
    print("Batch Size {}/{}".format(current_batch_num, batch_num))
    #

    xl_dataset = data_utils.TensorDataset(z, y)
    xl_dataloader = data_utils.DataLoader(xl_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)
    xl_dataloader = [(z, y) for z, y in xl_dataloader]
    print(len(xl_dataloader))


def explate(seq):
    out = ""
    for c in seq:
        out = out + c + ' '
    return out[:-1]


def extract_genomes(path):
예제 #5
0
import torch.utils.data as Data
import matplotlib.pyplot as plt

LR=0.01
BATCH_SIZE=32
EPOCH=12


x=torch.unsqueeze(torch.linspace(-1,1,100),dim=1)
y=x.pow(2)+0.1*torch.normal(torch.zeros(*x.size()))

# plt.scatter(x.numpy(),y.numpy())
# plt.show()


torch_dataset=Data.TensorDataset(data_tensor=x,target_tensor=y)
loader=Data.DataLoader(
    dataset=torch_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
)
class Net(torch.nn.Module):
    def __init__(self):
        super(Net,self).__init()
        self.hidden=torch.nn.Linear(1,20)
        self.predict=torch.nn.Linear(20,1)

    def forward(self, x):
        x=F.relu(self.hidden(x))
        x=self.predict(x)
예제 #6
0
target = []
folder = sorted(glob.glob('./dynamic_image/*'))
# %%
for path in folder:

    a = sorted(glob.glob(path + '/*.jpeg'))
    frames = [cv2.imread(f) for f in a]
    tmp = torch.stack([torch.Tensor(i) for i in frames]).permute(0, 3, 1, 2)
    tensor_x = torch.cat((tensor_x, tmp), 0)
    for i in range(len(a)):
        target.append(path.split("/")[-1])
# %%
le.fit(target)
tensor_y = torch.tensor(le.transform(target))
tensor_x = tensor_x / 255
train_set = utils.TensorDataset(tensor_x, tensor_y)
# %%

# image,label = next(iter(train_set))
# image = image.permute(1,2,0)
# plt.imshow(image)

# %%
display_loader = torch.utils.data.DataLoader(train_set,
                                             batch_size=10,
                                             shuffle=True)
batch = next(iter(display_loader))
print('len:', len(batch))
images, labels = batch
grid = torchvision.utils.make_grid(images, nrow=10)
plt.figure(figsize=(30, 30))
예제 #7
0
def main():
    args = define_and_get_arguments()

    hook = sy.TorchHook(torch)

    # 가상작업자(시뮬레이션) 사용시 이곳으로 분기
    if args.use_virtual:
        alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose)
        bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose)
        charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose)
    # 웹소켓작업자 사용시 이곳으로 분기
    else:
        a_kwargs_websocket = {"host": "192.168.0.57", "hook": hook}
        b_kwargs_websocket = {"host": "192.168.0.58", "hook": hook}
        c_kwargs_websocket = {"host": "192.168.0.59", "hook": hook}

        baseport = 10002
        alice = WebsocketClientWorker(id="alice",
                                      port=baseport,
                                      **a_kwargs_websocket)
        bob = WebsocketClientWorker(id="bob",
                                    port=baseport,
                                    **b_kwargs_websocket)
        charlie = WebsocketClientWorker(id="charlie",
                                        port=baseport,
                                        **c_kwargs_websocket)

    # 워커 객체를 리스트로 묶음
    workers = [alice, bob, charlie]

    # 쿠다 사용 여부
    use_cuda = args.cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {}

    # 랜덤 시드 설정
    torch.manual_seed(args.seed)

    labels_resampled_factorized, obs_resampled_with_noise_2 = process_data()

    # percentage of test/valid set to use for testing and validation from the test_valid_idx (to be called test_size)
    test_size = 0.1

    # obtain training indices that will be used for validation
    num_train = len(obs_resampled_with_noise_2)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = int(np.floor(test_size * num_train))
    train_idx, test_idx = indices[split:], indices[:split]

    print(type(obs_resampled_with_noise_2[train_idx]),
          type(labels_resampled_factorized[train_idx]))
    print(obs_resampled_with_noise_2[train_idx].shape,
          labels_resampled_factorized[train_idx].shape)
    print(labels_resampled_factorized[train_idx])
    federated_train_dataset = D.TensorDataset(
        torch.tensor(obs_resampled_with_noise_2[train_idx]),
        torch.tensor(labels_resampled_factorized[train_idx]))

    federated_train_loader = sy.FederatedDataLoader(
        federated_train_dataset.federate(tuple(workers)),
        batch_size=args.batch_size,
        shuffle=True,
        iter_per_worker=True,
        **kwargs,
    )

    test_dataset = D.TensorDataset(
        torch.tensor(obs_resampled_with_noise_2[test_idx]),
        torch.tensor(labels_resampled_factorized[test_idx]))

    test_loader = D.DataLoader(test_dataset,
                               shuffle=True,
                               batch_size=args.batch_size,
                               num_workers=0,
                               drop_last=True)

    model = Net(input_features=1, output_dim=5).to(device)
    criterion = nn.NLLLoss()

    for epoch in range(1, args.epochs + 1):
        logger.info("Starting epoch %s/%s", epoch, args.epochs)
        model = train(model,
                      device,
                      federated_train_loader,
                      args.lr,
                      args.federate_after_n_batches,
                      criterion=criterion)
        test(model,
             test_loader,
             args.batch_size,
             criterion=criterion,
             train_on_gpu=use_cuda)

    if args.save_model:
        torch.save(model.state_dict(), "./Model/mnist_cnn.pt")
예제 #8
0

def read_idx3_ubyte(path):
    with open_maybe_compressed_file(path) as f:
        data = f.read()
        assert get_int(data[:4]) == 8 * 256 + 3
        length = get_int(data[4:8])
        num_rows = get_int(data[8:12])
        num_cols = get_int(data[12:16])
        parsed = np.frombuffer(data, dtype=np.uint8, offset=16)
        return torch.from_numpy(parsed).view(length, num_rows, num_cols)


qtrainimgs = read_idx3_ubyte("qmnist-train-images-idx3-ubyte")
qtrainlbls = read_idx2_int("qmnist-train-labels-idx2-int")
qtrain = data.TensorDataset(qtrainimgs.float() / 255, qtrainlbls[:, 0])

# let's go


def subset(dataset, digit):
    temp = []
    for i in range(len(dataset)):
        (_, lbl) = dataset[i]
        if lbl == digit:
            temp += (i, )
    return data.Subset(dataset, temp)


def collect_images(data):
    with torch.no_grad():
예제 #9
0
            linear_feature_columns + dnn_feature_columns)

    print("feature_index ", feature_index)

    target = ['label']
    X = train_model_input
    Y = train[target].values

    if isinstance(X, dict):
        X = [X[feature] for feature in feature_index]

    for i in range(len(X)):
        if len(X[i].shape) == 1:
            X[i] = np.expand_dims(X[i], axis=1)

    train_tensor_data = Data.TensorDataset(torch.from_numpy(np.concatenate(X, axis=-1)), torch.from_numpy(Y))

    torchX = torch.from_numpy(np.concatenate(X, axis=-1))
    print("torchX size: ", torchX.size())

    print("train_model_input: ", len(X))
    for feat in sparse_feature_columns:
        print("feat" , feat)
        print("feature_index[feat.name][0]" , feature_index[feat.name][0])
        test= torchX[:, feature_index[feat.name][0]:feature_index[feat.name][1]].long()
        #print("torchX range:", test)
        #print("embedding retrieved: ", embedding_dict[feat.embedding_name](test))

    sparse_embedding_list = [embedding_dict[feat.embedding_name]
            (torchX[:, feature_index[feat.name][0]:feature_index[feat.name][1]].long()) for feat in sparse_feature_columns]
    print("sparse_embedding_list:", len(sparse_embedding_list))
예제 #10
0
파일: anjie_nn.py 프로젝트: cosmos0x57/test

narray = np.load("E:\\处理数据及问题\\安捷暖通-5m-模型-末端.npy", allow_pickle=True)
X = narray[:, 0:85]
Y = narray[:, 85][:, np.newaxis]
# Y = minmax(Y)
train_x, test_x, train_y, test_y = train_test_split(X,
                                                    Y,
                                                    test_size=0.15,
                                                    random_state=5)

train_x = torch.from_numpy(train_x).float()
train_y = torch.from_numpy(train_y).float()
test_x = torch.from_numpy(test_x).float()
test_y = torch.from_numpy(test_y).float()
torch_train_dataset = Data.TensorDataset(train_x, train_y)
train_loader = Data.DataLoader(dataset=torch_train_dataset,
                               batch_size=BATCH_SIZE,
                               shuffle=True,
                               num_workers=2)
torch_test_dataset = Data.TensorDataset(test_x, test_y)
test_loader = Data.DataLoader(dataset=torch_test_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=2)


def mape(y_true, y_pred):
    return np.mean(np.abs((y_pred - y_true) / y_true)) * 100

예제 #11
0
device = 'cuda' if torch.cuda.is_available() else 'cpu'

#print(device)

# %% [code]
#Import datasets
dataset = pd.read_csv("/kaggle/input/Kannada-MNIST/train.csv")
validation_dataset = pd.read_csv("/kaggle/input/Kannada-MNIST/test.csv")

# %% [code]
#Separate labels and vectors an one hot encoding
y_dataset, X_dataset = dataset["label"], dataset.drop("label", axis=1)
y_dataset, X_dataset = torch.tensor(y_dataset.to_numpy(), dtype = torch.long), torch.tensor(X_dataset.to_numpy(), dtype = torch.float32)/255.

#changing to a torch.dataset
train = data_utils.TensorDataset(X_dataset, y_dataset)
train_dataset, test_dataset, trash = torch.utils.data.random_split(train, [55000, 5000,0])
train_loader = data_utils.DataLoader(train_dataset, batch_size = params["batch_size"], shuffle = True)
test_loader = data_utils.DataLoader(test_dataset, batch_size = params["batch_size"], shuffle = True)

#for i in range(len(X_dataset[0])):
#    X_dataset[i,:] = (X_dataset[i,:] - statistics.mean(X_dataset[i,:]))/statistics.sdev(X_dataset[i,:])

# %% [code]
#Definition of the class to hold the net arquitecture
class FullyConnected(nn.Module):
  
  def __init__(self, num_inputs, hidden_size, num_classes):
    super().__init__()
    
    self.h1 = nn.Linear(num_inputs,hidden_size)
예제 #12
0
def obtain_fake_features(vf_path, f_num, nos_len):
    g = Generated_Fake_Features()
    # noise length
    features, labels = g.generate_fake_features(vf_path, f_num, nos_len)
    dataset = Data.TensorDataset(features, labels)
    return dataset
예제 #13
0
        wr_idx_num = int(right_num.item() * 1.5)

        wr_idx = random.sample(random_wrong_idx, wr_idx_num)
        w_f = fake_features[wr_idx]
        wrong_feature = w_f.detach()
        [wrong_features.append(i.squeeze()) for i in wrong_feature]

    wrong_features = np.array(wrong_features)
    # right_features = torch.from_numpy(np.array(right_features).astype(float))
    right_features = torch.from_numpy(np.array([item.cpu().numpy() for item in right_features])).cuda()
    wrong_features = torch.from_numpy(np.array([item.cpu().numpy() for item in wrong_features])).cuda()
    original_labels = torch.from_numpy(np.array([item.cpu().numpy() for item in original_labels])).cuda()
    right_labels = torch.ones(right_features.shape[0])
    wrong_labels = torch.zeros(wrong_features.shape[0])

    train_dataset_g_to_f = Data.TensorDataset(right_features, original_labels)

    new_vs_features = torch.cat([right_features, wrong_features], dim=0)
    new_vs_labels = torch.cat([right_labels, wrong_labels], dim=0)

    train_dataset_g_to_c = Data.TensorDataset(new_vs_features, new_vs_labels)

    dataset_loader_g_to_c = Data.DataLoader(train_dataset_g_to_c, 40, shuffle=True, drop_last=True)
    dataset_loader_g_to_f = Data.DataLoader(train_dataset_g_to_f, 20, shuffle=True, drop_last=True)
    ############################################# G-->C ####################################################
    loss_c = 0
    loss_f = 0
    for i, batch in enumerate(dataset_loader_g_to_c, 0):
        g_net.zero_grad()
        c_net.zero_grad()
        init_model_params(g_net, True)
예제 #14
0
    def compute(self):
        for i in range(self.partition):
            start = time.time()
            temptrain_X, temptrain_Y, tempval_X, tempval_Y = self.create_data(i)
            self.train_X, self.train_Y = factorize(temptrain_X, temptrain_Y, self.augment_data_flag, self.batch_size, self.batch_length)
            self.val_X, self.val_Y = factorize(tempval_X, tempval_Y, self.augment_data_flag, self.batch_size, self.batch_length)
            train_set = data_utils.TensorDataset(self.train_X, self.train_Y)
            train_loader=data_utils.DataLoader(dataset=train_set, batch_size=BATCH_SIZE, drop_last=True, shuffle=True) 

            
            print(i, "phase 1 completed.")
            
            cur_model = deepcopy(self.model).to(device)
            
            optimizer = optim.SGD(cur_model.parameters(), lr = self.lr, weight_decay = 1e-5)
    
            criterion = nn.SmoothL1Loss()

            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 1, gamma = self.gamma)

            print_loss_total = 0
            plot_loss_total = 0
            
            
            for j in range(self.epochs):
                for num, (train_X, train_Y)in enumerate(train_loader):
                    input_tensor = train_X.to(device).float()
                    target_tensor = train_Y.to(device).float()
                    #print(input_tensor.shape)
                    loss = train(input_tensor, target_tensor, cur_model, decoder_optimizer=optimizer, criterion= criterion)
                    #print(num, loss)
                    print_loss_total += loss
                    plot_loss_total += loss
                    
                    if (num+1)%self.plot_every == 0:
                        plot_loss_avg = plot_loss_total / self.plot_every
                        plot_loss_total = 0
                        self.loss_history.append(plot_loss_avg)
                        
                        acc, one_acc, score = validate(cur_model, self.val_X, self.val_Y)
                        self.precision_history.append(acc[:-1])
                        self.recall_history.append(one_acc[:-1])
                        print("validation accuracy:", acc)
                        print("validation prediction accuracy:", one_acc)

                    if (num+1) % self.print_every == 0:
                        
                        print_loss_avg = print_loss_total / self.print_every
                        print_loss_total = 0
                        print("partition%i epoch %i"%(i,j))
                        p = self.timeSince(start, (num+j*len(train_loader)) / (self.epochs * len(train_loader)))
                        print('%s (%d %d%%) %.4f' % (p, num + 1, (num + 1) / (self.epochs * len(train_loader)) * self.print_every,
                                                     print_loss_avg))
                        """if(score > self.best_acc):
                        #    torch.save(cur_model.state_dict(), '/home/yiqin/2018summer_project/saved_model/Bi-LSTM-CNN_best(cv).pt')
                            self.best_acc = score
                        print("best_score:", self.best_acc)"""

                scheduler.step()
                #torch.save(cur_model.state_dict(), '/home/yiqin/2018summer_project/saved_model/Bi-LSTM-CNN(cv){}-{}.pt'.format(i,j))
                
        return self.loss_history, self.precision_history, self.recall_history
예제 #15
0
    def _train(
        self,
        num_atoms: int,
        training_batch_size: int,
        learning_rate: float,
        validation_fraction: float,
        stop_after_epochs: int,
        max_num_epochs: int,
        clip_max_norm: Optional[float],
        exclude_invalid_x: bool,
        discard_prior_samples: bool,
    ) -> None:
        r"""
        Trains the neural classifier.

        Update the classifier weights by maximizing a Bernoulli likelihood which
        distinguishes between jointly distributed $(\theta, x)$ pairs and randomly
        chosen $(\theta, x)$ pairs.

        Uses performance on a held-out validation set as a terminating condition (early
        stopping).
        """

        # Starting index for the training set (1 = discard round-0 samples).
        start_idx = int(discard_prior_samples and self._round > 0)
        theta, x, _ = self._get_from_data_bank(start_idx, exclude_invalid_x)

        # Get total number of training examples.
        num_examples = len(theta)

        # Select random train and validation splits from (theta, x) pairs.
        permuted_indices = torch.randperm(num_examples)
        num_training_examples = int((1 - validation_fraction) * num_examples)
        num_validation_examples = num_examples - num_training_examples
        train_indices, val_indices = (
            permuted_indices[:num_training_examples],
            permuted_indices[num_training_examples:],
        )

        clipped_batch_size = min(training_batch_size, num_validation_examples)

        num_atoms = clamp_and_warn(
            "num_atoms", num_atoms, min_val=2, max_val=clipped_batch_size
        )

        # Dataset is shared for training and validation loaders.
        dataset = data.TensorDataset(theta, x)

        # Create neural net and validation loaders using a subset sampler.
        train_loader = data.DataLoader(
            dataset,
            batch_size=clipped_batch_size,
            drop_last=True,
            sampler=SubsetRandomSampler(train_indices),
        )
        val_loader = data.DataLoader(
            dataset,
            batch_size=clipped_batch_size,
            shuffle=False,
            drop_last=False,
            sampler=SubsetRandomSampler(val_indices),
        )

        optimizer = optim.Adam(
            list(self._posterior.net.parameters()), lr=learning_rate,
        )

        epoch, self._val_log_prob = 0, float("-Inf")

        while epoch <= max_num_epochs and not self._converged(epoch, stop_after_epochs):

            # Train for a single epoch.
            self._posterior.net.train()
            for batch in train_loader:
                optimizer.zero_grad()
                theta_batch, x_batch = (
                    batch[0].to(self._device),
                    batch[1].to(self._device),
                )
                loss = self._loss(theta_batch, x_batch, num_atoms)
                loss.backward()
                if clip_max_norm is not None:
                    clip_grad_norm_(
                        self._posterior.net.parameters(), max_norm=clip_max_norm,
                    )
                optimizer.step()

            epoch += 1

            # Calculate validation performance.
            self._posterior.net.eval()
            log_prob_sum = 0
            with torch.no_grad():
                for batch in val_loader:
                    theta_batch, x_batch = (
                        batch[0].to(self._device),
                        batch[1].to(self._device),
                    )
                    log_prob = self._loss(theta_batch, x_batch, num_atoms)
                    log_prob_sum -= log_prob.sum().item()
                self._val_log_prob = log_prob_sum / num_validation_examples
                # Log validation log prob for every epoch.
                self._summary["validation_log_probs"].append(self._val_log_prob)

            self._maybe_show_progress(self._show_progress_bars, epoch)

        self._report_convergence_at_end(epoch, stop_after_epochs, max_num_epochs)

        # Update summary.
        self._summary["epochs"].append(epoch)
        self._summary["best_validation_log_probs"].append(self._best_val_log_prob)
예제 #16
0
low_resolution_samples, index = utils.divide(input_file)

low_resolution_samples = np.minimum(HiC_max_value, low_resolution_samples)

batch_size = low_resolution_samples.shape[0]

# Reshape the high-quality Hi-C sample as the target value of the training.
sample_size = low_resolution_samples.shape[-1]
padding = conv2d1_filters_size + conv2d2_filters_size + conv2d3_filters_size - 3
half_padding = padding / 2
output_length = sample_size - padding

print low_resolution_samples.shape

lowres_set = data.TensorDataset(
    torch.from_numpy(low_resolution_samples),
    torch.from_numpy(np.zeros(low_resolution_samples.shape[0])))
lowres_loader = torch.utils.data.DataLoader(lowres_set,
                                            batch_size=batch_size,
                                            shuffle=False)

hires_loader = lowres_loader

model = model.Net(40, 28)
model.load_state_dict(torch.load('../model/pytorch_model_12000'))
if use_gpu:
    model = model.cuda()

_loss = nn.MSELoss()

running_loss = 0.0
def load_array(data_arrays, batch_size, is_train=True):
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)
예제 #18
0
def sac(
    env_name,
    total_steps,
    model,
    env_steps=0,
    min_steps_per_update=1,
    iters_per_update=100,
    replay_batch_size=64,
    seed=0,
    gamma=0.95,
    polyak=0.995,
    alpha=0.2,
    sgd_batch_size=64,
    sgd_lr=1e-3,
    exploration_steps=100,
    replay_buf_size=int(100000),
    use_gpu=False,
    reward_stop=None,
    env_config={},
):
    """
    Implements soft actor critic

    Args:
        env_name: name of the openAI gym environment to solve
        total_steps: number of timesteps to run the PPO for
        model: model from seagul.rl.models. Contains policy, value fn, q1_fn, q2_fn
        min_steps_per_update: minimun number of steps to take before running updates, will finish episodes before updating
        env_steps: number of steps the environment takes before finishing, if the environment emits a done signal before this we consider it a failure.
        iters_per_update: how many update steps to make every time we update
        replay_batch_size: how big a batch to pull from the replay buffer for each update
        seed: random seed for all rngs
        gamma: discount applied to future rewards, usually close to 1
        polyak: term determining how fast the target network is copied from the value function
        alpha: weighting term for the entropy. 0 corresponds to no penalty for deterministic policy
        sgd_batch_size: minibatch size for policy updates
        sgd_lr: initial learning rate for policy optimizer
        val_lr: initial learning rate for value optimizer
        q_lr: initial learning rate for q fn optimizer
        exploration_steps: initial number of random actions to take, aids exploration
        replay_buf_size: how big of a replay buffer to use
        use_gpu: determines if we try to use a GPU or not
        reward_stop: reward value to bail at
        env_config: dictionary containing kwargs to pass to your the environment
    
    Returns:
        model: trained model
        avg_reward_hist: list with the average reward per episode at each epoch
        var_dict: dictionary with all locals, for logging/debugging purposes

    Example:
        from seagul.rl.algos.sac import sac
        import torch.nn as nn
        from seagul.nn import MLP
        from seagul.rl.models import SACModel

        input_size = 3
        output_size = 1
        layer_size = 64
        num_layers = 2
        activation = nn.ReLU

        policy = MLP(input_size, output_size*2, num_layers, layer_size, activation)
        value_fn = MLP(input_size, 1, num_layers, layer_size, activation)
        q1_fn = MLP(input_size + output_size, 1, num_layers, layer_size, activation)
        q2_fn = MLP(input_size + output_size, 1, num_layers, layer_size, activation)
        model = SACModel(policy, value_fn, q1_fn, q2_fn, 1)

        model, rews, var_dict = sac("Pendulum-v0", 10000, model)
    """
    torch.set_num_threads(1)

    env = gym.make(env_name, **env_config)
    if isinstance(env.action_space, gym.spaces.Box):
        act_size = env.action_space.shape[0]
        act_dtype = env.action_space.sample().dtype
    else:
        raise NotImplementedError("trying to use unsupported action space",
                                  env.action_space)

    obs_size = env.observation_space.shape[0]

    random_model = RandModel(model.act_limit, act_size)
    replay_buf = ReplayBuffer(obs_size, act_size, replay_buf_size)
    target_value_fn = dill.loads(dill.dumps(model.value_fn))

    pol_opt = torch.optim.Adam(model.policy.parameters(), lr=sgd_lr)
    val_opt = torch.optim.Adam(model.value_fn.parameters(), lr=sgd_lr)
    q1_opt = torch.optim.Adam(model.q1_fn.parameters(), lr=sgd_lr)
    q2_opt = torch.optim.Adam(model.q2_fn.parameters(), lr=sgd_lr)

    # seed all our RNGs
    env.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    # set defaults, and decide if we are using a GPU or not
    use_cuda = torch.cuda.is_available() and use_gpu
    device = torch.device("cuda:0" if use_cuda else "cpu")

    raw_rew_hist = []
    val_loss_hist = []
    pol_loss_hist = []
    q1_loss_hist = []
    q2_loss_hist = []

    progress_bar = tqdm.tqdm(total=total_steps)
    cur_total_steps = 0
    progress_bar.update(0)
    early_stop = False

    while cur_total_steps < exploration_steps:
        ep_obs1, ep_obs2, ep_acts, ep_rews, ep_done = do_rollout(
            env, random_model, env_steps)
        replay_buf.store(ep_obs1, ep_obs2, ep_acts, ep_rews, ep_done)

        ep_steps = ep_rews.shape[0]
        cur_total_steps += ep_steps

    progress_bar.update(cur_total_steps)

    while cur_total_steps < total_steps:
        cur_batch_steps = 0

        # Bail out if we have met out reward threshold
        if len(raw_rew_hist) > 2 and reward_stop:
            if raw_rew_hist[-1] >= reward_stop and raw_rew_hist[
                    -2] >= reward_stop:
                early_stop = True
                break

        # collect data with the current policy
        # ========================================================================
        while cur_batch_steps < min_steps_per_update:
            ep_obs1, ep_obs2, ep_acts, ep_rews, ep_done = do_rollout(
                env, model, env_steps)
            replay_buf.store(ep_obs1, ep_obs2, ep_acts, ep_rews, ep_done)

            ep_steps = ep_rews.shape[0]
            cur_batch_steps += ep_steps
            cur_total_steps += ep_steps

            raw_rew_hist.append(torch.sum(ep_rews))

        progress_bar.update(cur_batch_steps)

        for _ in range(min(int(ep_steps), iters_per_update)):
            # compute targets for Q and V
            # ========================================================================
            replay_obs1, replay_obs2, replay_acts, replay_rews, replay_done = replay_buf.sample_batch(
                replay_batch_size)

            q_targ = replay_rews + gamma * (
                1 - replay_done) * target_value_fn(replay_obs2)
            q_targ = q_targ.detach()

            noise = torch.randn(replay_batch_size, act_size)
            sample_acts, sample_logp = model.select_action(replay_obs1, noise)

            q_in = torch.cat((replay_obs1, sample_acts), dim=1)
            q_preds = torch.cat((model.q1_fn(q_in), model.q2_fn(q_in)), dim=1)
            q_min, q_min_idx = torch.min(q_preds, dim=1)
            q_min = q_min.reshape(-1, 1)

            v_targ = q_min - alpha * sample_logp
            v_targ = v_targ.detach()

            # For training, transfer model to GPU
            model.policy = model.policy.to(device)
            model.value_fn = model.value_fn.to(device)
            model.q1_fn = model.q1_fn.to(device)
            model.q2_fn = model.q2_fn.to(device)

            # q_fn update
            # ========================================================================
            training_data = data.TensorDataset(replay_obs1, replay_acts,
                                               q_targ)
            training_generator = data.DataLoader(training_data,
                                                 batch_size=sgd_batch_size,
                                                 shuffle=True,
                                                 num_workers=0,
                                                 pin_memory=False)

            for local_obs, local_acts, local_qtarg in training_generator:
                # Transfer to GPU (if GPU is enabled, else this does nothing)
                local_obs, local_acts, local_qtarg = (
                    local_obs.to(device),
                    local_acts.to(device),
                    local_qtarg.to(device),
                )

                q_in = torch.cat((local_obs, local_acts), dim=1)
                q1_preds = model.q1_fn(q_in)
                q2_preds = model.q2_fn(q_in)
                q1_loss = torch.pow(q1_preds - local_qtarg, 2).mean()
                q2_loss = torch.pow(q2_preds - local_qtarg, 2).mean()
                q_loss = q1_loss + q2_loss

                q1_opt.zero_grad()
                q2_opt.zero_grad()
                q_loss.backward()
                q1_opt.step()
                q2_opt.step()

            # val_fn update
            # ========================================================================
            training_data = data.TensorDataset(replay_obs1, v_targ)
            training_generator = data.DataLoader(training_data,
                                                 batch_size=sgd_batch_size,
                                                 shuffle=True,
                                                 num_workers=0,
                                                 pin_memory=False)

            for local_obs, local_vtarg in training_generator:
                # Transfer to GPU (if GPU is enabled, else this does nothing)
                local_obs, local_vtarg = (local_obs.to(device),
                                          local_vtarg.to(device))

                # predict and calculate loss for the batch
                val_preds = model.value_fn(local_obs)
                val_loss = torch.sum(torch.pow(val_preds - local_vtarg,
                                               2)) / replay_batch_size

                # do the normal pytorch update
                val_opt.zero_grad()
                val_loss.backward()
                val_opt.step()

            # policy_fn update
            # ========================================================================
            training_data = data.TensorDataset(replay_obs1)
            training_generator = data.DataLoader(training_data,
                                                 batch_size=sgd_batch_size,
                                                 shuffle=True,
                                                 num_workers=0,
                                                 pin_memory=False)

            for local_obs in training_generator:
                # Transfer to GPU (if GPU is enabled, else this does nothing)
                local_obs = local_obs[0].to(device)

                noise = torch.randn(local_obs.shape[0], act_size).to(device)
                local_acts, local_logp = model.select_action(local_obs, noise)

                q_in = torch.cat((local_obs, local_acts), dim=1)
                pol_loss = torch.sum(alpha * local_logp -
                                     model.q1_fn(q_in)) / replay_batch_size

                # do the normal pytorch update
                pol_opt.zero_grad()
                pol_loss.backward()
                pol_opt.step()

            # Update target value fn with polyak average
            # ========================================================================
            val_loss_hist.append(val_loss.item())
            pol_loss_hist.append(pol_loss.item())
            q1_loss_hist.append(q1_loss.item())
            q2_loss_hist.append(q2_loss.item())
            #
            # model.policy.state_means = update_mean(replay_obs1, model.policy.state_means, cur_total_steps)
            # model.policy.state_var  =  update_var(replay_obs1, model.policy.state_var, cur_total_steps)
            # model.value_fn.state_means = model.policy.state_means
            # model.policy.state_var = model.policy.state_var
            #
            # model.q1_fn.state_means = update_mean(torch.cat((replay_obs1, replay_acts.detach()), dim=1), model.q1_fn.state_means, cur_total_steps)
            # model.q1_fn.state_var = update_var(torch.cat((replay_obs1, replay_acts.detach()), dim=1), model.q1_fn.state_var, cur_total_steps)
            # model.q2_fn.state_means = model.q1_fn.state_means
            # model.q2_fn.state_var = model.q1_fn.state_var

            # Transfer back to CPU, which is faster for rollouts
            model.policy = model.policy.to('cpu')
            model.value_fn = model.value_fn.to('cpu')
            model.q1_fn = model.q1_fn.to('cpu')
            model.q2_fn = model.q2_fn.to('cpu')

            val_sd = model.value_fn.state_dict()
            tar_sd = target_value_fn.state_dict()
            for layer in tar_sd:
                tar_sd[layer] = polyak * tar_sd[layer] + (
                    1 - polyak) * val_sd[layer]

            target_value_fn.load_state_dict(tar_sd)

    return model, raw_rew_hist, locals()
예제 #19
0
def train(h5file, h5key, pklfile, validationh5, trainedlossplot, train_target):

        # ******* input dataset from h5, then divide it into train dataset and test dataset(16:1)

        net = Net(n_feature=75,  n_output=1)
        # pklfile6 = 'train6/NN_train_params_3975284924_2.pkl'	    
        # net.load_state_dict(torch.load(pklfile6))
        net.cuda()
        net = net.double()
        print(net)
        logdir = Dir_training + 'NN_logs_' + h5key
        if os.path.isdir(logdir):
            shutil.rmtree(logdir)
        logger = Logger(logdir)

        	
        # optimizer = torch.optim.SGD(net.parameters(), lr=LR, weight_decay=0.01,momentum=0.9)
        # optimizer = torch.optim.SGD(net.parameters(), lr=LR, momentum=0.5)
        # optimizer = torch.optim.Adagrad(net.parameters(), lr=LR, lr_decay=0.01)
        optimizer = torch.optim.Adam(net.parameters(), lr=LR)
        # optimizer = torch.optim.RMSprop(net.parameters(), lr=LR, weight_decay=5e-2)
        loss_func = nn.MSELoss()
        print("Let's use", torch.cuda.device_count(), "GPUs!")
              
        plt.ion()
        plt.figure(figsize=(10,4))	
        loss_list_train = []
        loss_list_test  = []
        step_list = []
        # par_np = net.parameters()
        
        Step = 0 
        lri = LR

        # ****** test dataset	
        mydf_test = pd.read_hdf(h5file, h5key, start=0, stop= 100000)
        test_data_np = mydf_test.iloc[:,4:].replace(np.nan, 0.0).values
        test_data_tensor = torch.from_numpy(test_data_np).double()
        
        if train_target == 'phi':
            test_labels_np = mydf_test.mcPhi.values.reshape((mydf_test.shape[0],1))
            test_rec_np = mydf_test.phi.values.reshape((mydf_test.shape[0],1))	
        elif train_target == 'theta':
            test_labels_np = mydf_test.mcTheta.values.reshape((mydf_test.shape[0],1))
            test_rec_np = mydf_test.theta.values.reshape((mydf_test.shape[0],1))	
        else:
            print("Wrong train target!")
        
        test_labels_tensor = torch.from_numpy(test_labels_np).double()
        test_rec_tensor = torch.from_numpy(test_rec_np).double()	
        test_dataset   = Data.TensorDataset(test_data_tensor, test_labels_tensor)
        test_loader   = Data.DataLoader(test_dataset, batch_size=BATCH_SIZE_test )

        # res = net(test_data_tensor.cuda())
        # res = Variable(torch.rand(75,640))	
        # writer = SummaryWriter(logdir)
        # writer.add_graph(net, res.cuda().double())
        # writer.close()			

        for epoch in range(EPOCH):
           print('EPOCH:  ', epoch)
           reader = pd.read_hdf(h5file, h5key, chunksize=BATCH_SIZE*2, start = 100000)
           for mydf_readd5 in  reader: 	

              mydf_train = mydf_readd5
              # mydf_train = mydf_readd5.iloc[: int(mydf_readd5.shape[0]*15/16)]
              # mydf_test  = mydf_readd5.iloc[int(mydf_readd5.shape[0]*15/16):]
              # print(mydf_train.iloc[:,54:].head())
              # print(mydf_test.iloc[:,54:].head())
              # print(mydf_train.shape)
              

              # ****** train dataset
              train_data_np = mydf_train.iloc[:,4:].replace(np.nan, 0.0).values
              train_data_tensor = torch.from_numpy(train_data_np).double()
              if train_target == 'phi':
                  train_labels_np = mydf_train.mcPhi.values.reshape((mydf_train.shape[0],1))
              elif train_target == 'theta':
                  train_labels_np = mydf_train.mcTheta.values.reshape((mydf_train.shape[0],1))
              else:
                  print("Wrong train target!")
              
              train_labels_tensor = torch.from_numpy(train_labels_np).double()
              train_dataset   = Data.TensorDataset(train_data_tensor, train_labels_tensor)
              train_loader = Data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
              
              
              
              for step, data in enumerate(train_loader):
                  # b_x, b_y = data
                  b_X, b_Y = data
                  b_x = b_X.cuda()
                  b_y = b_Y.cuda()	       
              
                
                  # ****** L2 regularization        
                  reg_lambda = torch.tensor(0.2)
                  l2_reg = torch.tensor(0.)
                  for param in net.parameters(): 
                      l2_reg += param.cpu().float().norm(2)
                  
                  prediction = net(b_x).cuda()
                  loss = loss_func(prediction, b_y)
                  # loss +=  (reg_lambda*l2_reg).cuda().double()
                  optimizer.zero_grad()
                  loss.backward()
                  optimizer.step()
                  Step+=1      

                  if (Step+1) % 100 == 0:
                      test_output = net(test_data_tensor.cuda())
                      test_pred_y = test_output.cpu().data.numpy()
                      # test_pred_y = test_output.data.numpy()
                      accuracy_test = sum(test_pred_y - test_labels_np)
                      loss_test = loss_func(test_output, test_labels_tensor.cuda())
                      # loss_rec = loss_func(test_rec_tensor.cuda(), test_labels_tensor.cuda())
                      print('Epoch:', epoch, '|step:', Step,
                            '|train loss:%.8f'%loss.item(), '|test loss:%.8f'%loss_test.item())
                      step_list.append(Step)
                      loss_list_train.append(loss.item())
                      loss_list_test.append(loss_test.item())
                      
                      plt.subplot(131)
                      plt.cla()
                      plt.plot(step_list, loss_list_train, 'b-', lw=1, label='train')
                      plt.plot(step_list, loss_list_test, 'r-', lw=3, label='test')
                      plt.xlabel('step')
                      plt.ylabel('loss')
                      plt.text(10, 0.027, 'Loss_train=%.8f' % loss.item(), fontdict={'size': 10, 'color':  'blue'})
                      plt.text(10, 0.025, 'Loss_test=%.8f' % loss_test.item(), fontdict={'size': 10, 'color':  'red'})
                      # plt.text(10, 0.023, 'Loss_rec=%.8f' % loss_rec.data[0], fontdict={'size': 10, 'color':  'red'})
                      legend = plt.legend(loc="best")#(loc="best")
                      frame = legend.get_frame()
                      frame.set_facecolor('none') # 璁剧疆鍥句緥legend鑳屾櫙閫忔槑

                      if train_target == 'phi':
                          Range = [-3.2, 3.2]
                      elif train_target == 'theta':
                          Range = [0.4, 2.4]

                      plt.subplot(133)
                      plt.cla() 
                      plt.hist(test_labels_np, bins=200,range=Range, color='red',alpha=0.7, fill=False,histtype='step', label='test_truth') 
                      plt.hist(test_pred_y,    bins=200,range=Range, color='blue',alpha=0.7, fill=False,histtype='step', label='test_pre') 
                      plt.hist(test_rec_np,    bins=200,range=Range, color='green',alpha=0.7, fill=False,histtype='step', label='test_rec') 
                      plt.xlabel(r'$' + '\\'+ train_target + '$')
                      legend = plt.legend(loc="best")#(loc="best")
                      frame = legend.get_frame()
                      frame.set_facecolor('none') # 璁剧疆鍥句緥legend鑳屾櫙閫忔槑
                      
                      plt.subplot(132)
                      plt.cla() 
                      plt.hist(b_y.cpu().data.numpy(),        bins=200,range=Range, color='red',alpha=0.7, fill=False,histtype='step', label='train_truth') 
                      plt.hist(prediction.cpu().data.numpy(), bins=200,range=Range, color='blue',alpha=0.7, fill=False,histtype='step', label='train_pre') 
                      plt.xlabel(r'$' + '\\'+ train_target + '$')
                      legend = plt.legend(loc="best")#(loc="best")
                      frame = legend.get_frame()
                      frame.set_facecolor('none') # 璁剧疆鍥句緥legend鑳屾櫙閫忔槑
                      plt.pause(0.1)
                        
                      # ================================================================== #
                      #                        Tensorboard Logging                         #
                      # ================================================================== #

                      # 1. Log scalar values (scalar summary)
                      info = { 'loss': loss.item(),  'loss_test': loss_test.item(), 'accuracy': accuracy_test.item() }
                      
                      for tag, value in info.items():
                          logger.scalar_summary(tag, value, Step+1)
                      
                      # 2. Log values and gradients of the parameters (histogram summary)
                      for tag, value in net.named_parameters():
                          tag = tag.replace('.', '/')
                          logger.histo_summary(tag, value.data.cpu().numpy(), Step+1)
                          logger.histo_summary(tag+'/grad', value.grad.data.cpu().numpy(), Step+1)
                      
                      # 3. Log training images (image summary)
                      info = { 'images': b_x.view(-1, 5, 5)[:10].cpu().numpy() }
                      
                      for tag, images in info.items():
                          logger.image_summary(tag, images, Step+1)

           lri = lri/(1 + 0.005)
           print("lri:  ",lri)
           for param_group in optimizer.param_groups:
               param_group['lr'] = lri
           if (epoch+1) % 50 == 0:		   
              pklfile_epoch = Dir_pkl + 'NN_train_params_epoch' + str(epoch) + '.pkl'
              torch.save(net.state_dict(), pklfile_epoch)

        
        plt.ioff()
        plt.savefig(trainedlossplot,dpi=300)
        plt.show()

        loss_df = pd.DataFrame.from_dict({'step' : step_list, 'train' : loss_list_train, 'test' : loss_list_test})
        loss_df.to_hdf(train_lossh5, key=h5key, mode='w')

        test_output = net(test_data_tensor[:10].cuda())
        test_pred_y = test_output.cpu().data.numpy()
        # test_pred_y = test_output.data.numpy()
        print('prediction number:  ', test_pred_y )
        print( 'real number:  ', test_labels_np[:10])
        
        # ****** The model after train        
        for name, param in net.state_dict().items():
            print(name, param.size())


        # ****** save the whole model
        # torch.save(model_object, 'model.pkl')
        # only save the parameters ((recommended))
        torch.save(net.state_dict(), pklfile)

        test_pred_y = np.empty((0,1))
        for step, data in enumerate(test_loader): 	
            t_X,  t_Y = data
            t_x = t_X.cuda()
            t_y = t_Y.cuda()
            test_output = net(t_x).cuda()
            test_pred_y = np.vstack([test_pred_y, test_output.cpu().data.numpy()])
        
        # test_pred_y = np.delete(test_pred_y, 0, 0)
        print("shapes:  ", test_pred_y.shape)
        pred_df = pd.DataFrame(mydf_test[['mcPhi','phi', 'mcTheta', 'theta']])
        print("shapes:  ", test_pred_y.shape, pred_df.shape)
        if train_target == 'phi':
           pred_df['prePhi'] = test_pred_y
        elif train_target == 'theta':
           pred_df['preTheta'] = test_pred_y
        pred_df.to_hdf(validationh5, key=h5key, mode='w')
예제 #20
0
def DataloadtoGAN(path,
                  mark=None,
                  label=False,
                  single_dataset=False,
                  hacking=False,
                  select=''):
    """
    func: read normal data to train GAN defined as the Class(new gan code)
    :param path:dataset url
    :param mark:'validate' 'test' 'train'
    :param label: whether deliver label
    :param single_dataset:  for whole normal dataset,not suitable for normal status dataset from hacking dataset
    :param hacking: whole normal dataset or  normal status dataset from hacking dataset
    :return: dataloader,torch.Tensor

    """
    if mark == None:
        print('mark is None, please checks')
        return
    if hacking:
        files = []
        for d in os.listdir(path):
            if select.title() == d.title():
                if mark == 'validate':
                    f = os.path.join(path, d, 'pure_normal.pkl')
                else:
                    f = os.path.join(path, d, 'pure_attack.pkl')
                files.append(f)
                # print(files[0])
                break
            elif select == '':
                pass
            else:
                continue

            if 'normal.pkl' in d:
                files.append(os.path.join(path, d))
                continue
            elif '.' in d:
                continue
            else:
                for f in os.listdir(os.path.join(path, d)):
                    if 'normal.pkl' in f:
                        files.append(os.path.join(path, d, f))
    else:
        files = [os.path.join(path, f) for f in os.listdir(path) if 'pkl' in f]

    fl = []
    if single_dataset:
        files = [i for i in files if 'Attack_free_dataset2' in i]
    data2 = np.empty((64, 21))
    atta2 = np.empty((64, 21))

    # read dataset
    for i, f in enumerate(files):
        print('address:%s' % f)
        atta = np.empty(((64, 21)))

        data1 = pd.read_pickle(f, compression='zip')
        data = data1.values.astype(np.float64)
        # file = os.path.basename(path)
        rows = data.shape[0]
        start = 0
        end = rows
        row = int(rows // 64)
        row1 = row
        file = os.path.splitext(os.path.basename(f))[0]
        fl.append(file)
        dirname = os.path.dirname(f).split('/')[-1]

        if mark == 'test':
            start = int(((rows * 0.99) // 64) * 64)
            row = int((rows * 0.01) // 64)
            if start % 64 == 0:
                pass
            else:
                start = ((start // 64) + 1) * 64
            # end = int(start+((rows-start)//64)*64)
            end = int(start + row * 64)
        elif mark == 'train':
            print('get type:%s' % 'train')
            # row = int((rows*0.01)//64)
            # end = int(row * 64)
            row = int((rows * 0.98) // 64)
            end = int(row * 64)
        elif mark == 'validate':
            print('get type:%s,datatype:%s' % ('validate', dirname))
            row = int((rows * 0.01) // 64)
            start = int(((rows * 0.98) // 64) * 64)
            if start % 64 == 0:
                pass
            else:
                start = int(((start // 64) + 1) * 64)
            end = int(start + row * 64)
            # end =int(((rows*0.99)//64) * 64)

        if hacking:
            data = data[start:end, :-1].reshape((-1, 21))
            if mark == 'validate' or mark == 'test':
                url = os.path.dirname(f) + '/pure_attack.pkl'
                atta = pd.read_pickle(url, compression='zip')
                atta = pd.DataFrame(atta).to_numpy().reshape((-1, 64, 22))
                print('{},shape:{}'.format('pure_attack', atta.shape), end=',')
                print(
                    'start at:{},%64={},end:{},%64={},acquires row:{},percent:{}%,done read files!!!'
                    .format(start, start % 64, end, end % 64, row,
                            float(row / atta.shape[0])))
                atta = atta[:row, :, :21]
                # print('atta.shape---:',atta.shape)

            if i > 0:
                data2 = np.concatenate((data2, data), axis=0).reshape((-1, 21))
                atta2 = np.concatenate((atta2, atta), axis=0)
                # print('atta2.shape:',atta2.shape)
            else:
                data2 = data
                atta2 = atta
        else:
            data = data[start:end, :].reshape((-1, 21))
            if i > 0:
                data2 = np.concatenate((data2, data), axis=0).reshape((-1, 21))
            else:
                data2 = data
        print('{} shaped:{},trunked:{}'.format(file, data1.shape, data.shape),
              end=',')
        print('get|all:{}|{},blocks:{}'.format(row, row1, row % 64), end=',')
        print(
            'start at:{},%64={},end:{},%64={},percent:{}%,done read files!!!'.
            format(start, start % 64, end, end % 64, float(row / row1)))
        # exit()
    if mark == 'validate' or mark == 'test':
        atta2 = atta2.reshape((-1, 64, 21))
        data2 = data2.reshape((-1, 64, 21))
        label1 = np.ones((atta2.shape[0], 1))
        label0 = np.zeros((data2.shape[0], 1))

        data2 = np.concatenate((data2, atta2), axis=0)
        labels = np.concatenate((label0, label1), axis=0)

        TraindataM = torch.from_numpy(
            data2).float()  # transform to float torchTensor
        TraindataM = torch.unsqueeze(TraindataM, 1)
        Traindata_LabelM = torch.from_numpy(labels).float()
        TorchDataset = Data.TensorDataset(TraindataM, Traindata_LabelM)

        print('{},size:{} label:{},done read files!!!\n'.format(
            'validate mix dataset', TraindataM.shape, label))
        return Data.DataLoader(dataset=TorchDataset,
                               batch_size=BATCH_SIZE,
                               shuffle=True)

    TraindataM = torch.from_numpy(data2.reshape(
        (-1, 64, 21))).float()  # transform to float torchTensor
    TraindataM = torch.unsqueeze(TraindataM, 1)

    if label:
        # if mark == 'train' or mark == 'test':
        if select == 'Normal':
            labels = np.zeros((TraindataM.shape[0], 1))
        else:
            labels = np.ones((TraindataM.shape[0], 1))

        Traindata_LabelM = torch.from_numpy(labels).float()
        TorchDataset = Data.TensorDataset(TraindataM, Traindata_LabelM)
        print('{},size:{} label:{},done read files!!!\n'.format(
            fl, TraindataM.shape, label))
        return Data.DataLoader(dataset=TorchDataset,
                               batch_size=BATCH_SIZE,
                               shuffle=True)
    else:
        # if mark == 'train' or mark == 'test':
        # Data Loader for easy mini-batch return in training
        TorchDataset = Data.TensorDataset(TraindataM)
        print('{},size:{} label:{},done read files!!!\n'.format(
            fl, TraindataM.shape, label))
        return Data.DataLoader(dataset=TorchDataset,
                               batch_size=BATCH_SIZE,
                               shuffle=True)
예제 #21
0
def get_dataloaders(
        cfg: DictConfig,
        num_workers: int = 4,
        dataset_name: str = "mnist") -> Tuple[DataLoader, DataLoader]:
    """Return training and validation dataloaders"""

    ##################################################################
    # Change this. Demo dataset is MNIST
    ##################################################################
    if dataset_name == "mnist":
        data_transform = Compose(
            [ToTensor(), Normalize((0.1307, ), (0.3081, ))])

        dataset = torchvision.datasets.MNIST(
            os.path.join(hydra.utils.get_original_cwd(), cfg.dirs.data),
            download=True,
            transform=data_transform,
        )

        train_dataloader = DataLoader(
            dataset,
            batch_size=cfg.mode.train.batch_size,
            shuffle=cfg.mode.train.shuffle,
            num_workers=num_workers,
        )

        val_dataloader = DataLoader(
            dataset,
            batch_size=cfg.mode.val.batch_size,
            shuffle=cfg.mode.val.shuffle,
            num_workers=num_workers,
        )

    elif dataset_name == "reunion":

        (
            list_of_training_inputs,
            training_target_df,
            list_of_testing_inputs,
            testing_target_df,
        ) = process_uni_data(cfg)

        inputs_train = Variable(torch.FloatTensor(list_of_training_inputs))
        targets_train = Variable(torch.FloatTensor(training_target_df))
        inputs_test = Variable(torch.FloatTensor(list_of_testing_inputs))
        targets_test = Variable(torch.FloatTensor(testing_target_df))

        training_samples = utils_data.TensorDataset(inputs_train,
                                                    targets_train)

        train_dataloader = utils_data.DataLoader(training_samples,
                                                 batch_size=200,
                                                 drop_last=False,
                                                 shuffle=False)

        validation_samples = utils_data.TensorDataset(inputs_test,
                                                      targets_test)

        val_dataloader = utils_data.DataLoader(validation_samples,
                                               batch_size=200,
                                               drop_last=False,
                                               shuffle=False)

    return train_dataloader, val_dataloader
예제 #22
0
def read_dataset(root_path_=str,
                 target_type=str,
                 read_target=str,
                 usage=str,
                 res_num=int,
                 res_type='dataloader',
                 selected=None,
                 bias_dataset=str):  #,label=True
    """
    func:read dataset to nets,get data to Nets,satisfied multifunction
    :param root_path_:basedir of dataset
    :param target_type: 'csv','pkl','txt'
    :param read_target:'all','select'
    :param usage: 'train','validate','test','coding'
    :param res_num:refine how many result return for call
    :param label: default dataloarder with label
    :param res_type: default 'dataloader'
    :param selected:if read_target== select,select the selected to read,could be 'Dos Fuzzy,RPM,gear' dataset file name
    :param bias_dataset: whether a requirement of dataset is only normal or attack,it could be 'Normal','Attack','Both'
    :return: list of dataloarder or single dataloarder contained all read dataset
    """
    print(
        '-----------------------------------%s,%s-----------------------------'
        % (read_dataset.__name__, usage))
    print('data address:{}, sub-dataset:{}'.format(root_path_,
                                                   os.listdir(root_path_)))
    # selected attack type to read data
    if selected != None:
        selected = list(map(title, selected))
    if read_target == 'all':
        files = [os.path.join(root_path_, f) for f in os.listdir(root_path_)]
    elif read_target == 'select':
        files = [
            os.path.join(root_path_, f) for f in os.listdir(root_path_)
            if f.title() in selected
        ]
    else:
        print('func read_dataset: arise error at param read_target')

    # dataset_urls = []
    results = []
    for file in files:
        flag = 0
        try:
            for i in os.listdir(file):
                if target_type in i:
                    flag += 1
                    pass
            if flag == 0:
                print('{} has not {} file'.format(file, target_type),
                      'please check the file folder')
                print(os.listdir(file))
                # dataset_urls.append(os.path.join(file,i))
        except:
            print(files, '\n error for target file folder')
            return
    pool = mp.Pool(processes=len(files))

    for i in files:
        # print(i)
        # results.append(pool.apply(testdata,(os.path.join(path,i),mark,)))#_async
        results.append(
            pool.apply_async(base_read, (
                i,
                usage,
                target_type,
                bias_dataset,
            )))  # _async

    pool.close()
    pool.join()

    names = []
    flags = []
    row = 0

    column = results[0].get()[3]
    # print('column:',column)
    if res_type == 'seperate' or res_num > 1:
        data = []
    else:
        data = np.empty((64, column))

    f2 = lambda x: len(x)
    # ll = 0
    for i, result in enumerate(results):
        # result = result#.get()
        result = result.get()
        # print('i:', i,'file:',result[2])
        # # print('%s,%d'%(result[2],len(flg)))
        # flags.append(result[0])
        # data.append(result[1])
        label_ = []
        # ll = 0
        for flg in result[0]:
            # print(flg.__class__,len(flg))
            row += len(flg)

            if res_type == 'seperate':
                flags.append(flg)
            else:
                label_.extend(flg)
                #  older codes
                # if ll == 0 and i == 0:
                #     flags = flg
                #     # print('fg:',flg,flg.__class__)
                #     ll+=1
                # else:
                #     # print('flags:',flags)
                #     flags.extend(flg)
                # # print('%s,%d'%(result[2],len(flg)))
        # concat normal status and attack status or single data suche as normal status or attack status
        # of all types of attack to one container
        # concat all to one container
        if res_num == 1:
            flags.extend(label_)
        # concat to res_num containers,res_num default equal to the number of target_type
        else:
            flags.append(label_)

        la = 0
        dat = np.empty((1, 64, column))
        for dt in result[1]:
            if res_type == 'seperate':
                data.append(
                    np.array(dt).astype(np.float64).reshape((-1, 64, column)))
            else:
                dt = np.array(dt).reshape((-1, 64, column)).astype(np.float64)
                if la == 0:
                    dat = dt
                    la += 1
                else:
                    dat = np.concatenate((dat, dt), axis=0).reshape(
                        (-1, 64, column))
                # older codes
                # if la == 0 and i == 0:
                #     data = dt
                #     la += 1
                # else:
                #     data = np.concatenate((data,dt)).reshape((-1,64,column))
        # concat normal status and attack status or single data suche as normal status or attack status
        # of all types of attack to one container
        # concat all to one container
        if res_num == 1:
            if i == 0:
                data = dat
            else:
                data = np.concatenate((data, dat)).reshape((-1, 64, column))
        # concat to res_num containers,res_num default equal to the number of target_type
        else:
            data.append(dat)
        names.append(result[2])
        # row += sum(list(map(f2,result[0])))
    print('-' * 20, 'total result', '-' * 20)
    print(
        '\n return {} blocks of data,{} blocks of label,all {} blocks'.format(
            data.__len__(), len(flags), row),
        end=',')

    if res_type == 'seperate':
        return data, flags, names
    print(names, end=',')
    if res_num == 1:
        # data_array = np.array(data).reshape((-1,64.21))
        data_array = data
        labels = np.array(flags).reshape((-1, 1)).astype(np.float64)
        TraindataM = torch.from_numpy(
            data_array).float()  # transform to float torchTensor
        TraindataM = torch.unsqueeze(TraindataM, 1)
        Traindata_LabelM = torch.from_numpy(labels).float()
        TorchDataset = Data.TensorDataset(TraindataM, Traindata_LabelM)
        dataloader = Data.DataLoader(dataset=TorchDataset,
                                     batch_size=BATCH_SIZE,
                                     shuffle=True)
        print(
            'return one dataloarder with {} tensors,data shape:{},label shape:{}'
            .format(len(dataloader.dataset.tensors),
                    dataloader.dataset.tensors[0].size(),
                    dataloader.dataset.tensors[1].size()))
        print(
            '------------------------------------------------------------------'
        )
        return dataloader, names
    else:
        print('result len:', len(data), len(flags))
        dataloaders = []
        f1 = lambda x: x.dataset.tensors[0].size()
        # for i,label,dat in enumerate(list(zip(flags,data))):
        for label, dat in list(zip(flags, data)):
            dat = np.array(dat).reshape((-1, 64, column))
            label = np.array(label).reshape((-1, 1))
            TraindataM = torch.from_numpy(
                dat).float()  # transform to float torchTensor
            TraindataM = torch.unsqueeze(TraindataM, 1)
            Traindata_LabelM = torch.from_numpy(label).float()
            TorchDataset = Data.TensorDataset(TraindataM, Traindata_LabelM)
            dataloaders.append(
                Data.DataLoader(dataset=TorchDataset,
                                batch_size=BATCH_SIZE,
                                shuffle=True))

        print(
            'return list of dataloader has {} dataloarders,data shape respectively:{}'
            .format(len(dataloaders), list(map(f1, dataloaders))))
        print(
            '------------------------------------------------------------------'
        )
        return dataloaders, names
예제 #23
0
train_x = torch.from_numpy(train_x).type(torch.FloatTensor).cuda()
train_x = train_x / 255.0
train_y = torch.from_numpy(train_y).type(torch.int64).cuda()

test_x = torch.from_numpy(test_x).type(torch.FloatTensor).cuda()
test_x = test_x / 255.0
test_y = torch.from_numpy(test_y).type(torch.int64).cuda()

num_inputs = 784
num_outputs = 10

batch_size = 32

# 将训练数据的特征和标签组合
dataset = Data.TensorDataset(train_x, train_y)

# 把 dataset 放入 DataLoader
data_iter = Data.DataLoader(
    dataset=dataset,  # torch TensorDataset format
    batch_size=batch_size,  # mini batch size
    shuffle=True,  # 要不要打乱数据 (打乱比较好)
    num_workers=0,  # 多线程来读数据, 注意多线程需要在 if __name__ == '__main__': 函数中运行
)
# num_workers=0 表示不用额外的进程来加速读取数据

net = nn.Sequential(nn.Linear(num_inputs, num_outputs)).cuda()

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
예제 #24
0
testDIR = "D:/TestWithMFCC39/small data sets in the form of npy/test"
modelDIR = "D:/TestWithMFCC39/small data sets in the form of npy"
modelFILE = "reload_model.pth"

# 制作训练集
trainDS = np.load(train_npy_FILE)
#print("The shape of trainDS is {}".format(trainDS.shape))
trainDS = trainDS[np.newaxis, 0:TIME_STEP, :]
#print("The shape of trainDS is {}".format(trainDS.shape))

x_train = trainDS[:, :, 1:]
y_train = trainDS[:, :, 0]
y_train = torch.from_numpy(y_train).float()
x_train = torch.from_numpy(x_train).float()

trainDataSet = Data.TensorDataset(x_train, y_train)

trainLoader = Data.DataLoader(
    dataset=trainDataSet,  # torch TensorDataset format
    batch_size=BATCH_SIZE,  # mini batch size
    shuffle=True,  # 要不要打乱数据 (打乱比较好)
    #num_workers=1,  # 多线程来读数据
)

# 制作训练集
trainDS = np.load(train_npy_FILE)
print("The shape of trainDS is {}".format(trainDS.shape))
trainDS = trainDS[np.newaxis, 0:TIME_STEP, :]
print("The shape of trainDS is {}".format(trainDS.shape))

x_train = trainDS[:, :, 1:]
예제 #25
0
    trainy = pickle.load(open("./lib/trainR.pkl", 'rb'))
    testx = pickle.load(open("./lib/testD.pkl", 'rb'))
    testy = pickle.load(open("./lib/testR.pkl", 'rb'))
    return trainx, trainy, testx, testy


if __name__ == "__main__":
    # Trans_net = nn.Linear(10,1)
    model = RNN(input_size, hidden_size, num_layers).to(device)
    trainx, trainy, testx, testy = get_data()
    train1, train2, std1 = trans(trainx, trainy)
    test1, test2, std2 = trans(testx, testy)
    print(train1.shape)
    print(std1.shape)

    train_set = Data.TensorDataset(train1, train2, std1)
    train_loader = Data.DataLoader(dataset=train_set,
                                   batch_size=batch_size,
                                   shuffle=False,
                                   num_workers=0)
    test_set = Data.TensorDataset(test1, test2, std2)
    test_loader = Data.DataLoader(dataset=test_set,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  num_workers=0)
    # std = torch.Tensor([[3.8],[4.6]])

    milestone_list = [50, 150]
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)
    lr_scheduler = MultiStepLR(optimizer, milestones=milestone_list, gamma=0.1)
예제 #26
0
import numpy as np
import torch
from torch.utils import data

dataset = np.load('./expert.npz')
tensor_dataset = data.TensorDataset(torch.Tensor(dataset['obs']), torch.Tensor(dataset['action']))
dataloader = data.DataLoader(tensor_dataset, batch_size=50, shuffle=True)


예제 #27
0
파일: P4.py 프로젝트: timt51/Fall-2017
TRAIN_X = VECTORIZER.transform(TRAIN_X_RAW).todense()
for index, row in enumerate(TRAIN_X):
    if np.sum(row) < 1e-2:
        TRAIN_X[index, :] = np.ones((1, VOCABULARY_SIZE),dtype=np.float32)
TRAIN_X = TRAIN_X / TRAIN_X.sum(axis=1)
TRAIN_X = np.matmul(EMBEDDINGS, TRAIN_X.T)

DEV_X = VECTORIZER.transform(DEV_X_RAW).T
DEV_X = DEV_X / DEV_X.sum(axis=0)
DEV_X = np.matmul(EMBEDDINGS, DEV_X)

TEST_X = VECTORIZER.transform(TEST_X_RAW).T
TEST_X = TEST_X / TEST_X.sum(axis=0)
TEST_X = np.matmul(EMBEDDINGS, TEST_X)

TRAIN_DATA = data_utils.TensorDataset(torch.from_numpy(TRAIN_X.T.astype(np.float32)), torch.from_numpy(TRAIN_Y))
TRAIN_LOADER = data_utils.DataLoader(TRAIN_DATA, batch_size=173,
                                          shuffle=True, num_workers=2, drop_last=True)
DEV_DATA = data_utils.TensorDataset(torch.from_numpy(DEV_X.T.astype(np.float32)), torch.from_numpy(DEV_Y))
DEV_LOADER = data_utils.DataLoader(DEV_DATA, batch_size=1,
                                          shuffle=False, num_workers=2)
TEST_DATA = data_utils.TensorDataset(torch.from_numpy(TEST_X.T.astype(np.float32)), torch.from_numpy(TEST_Y))
TEST_LOADER = data_utils.DataLoader(TEST_DATA, batch_size=1,
                                          shuffle=False, num_workers=2)
##################################################################
# Define the network
##################################################################
class SentimentNet(nn.Module):
    def __init__(self, hidden_dim):
        super(SentimentNet, self).__init__()
        self.linear1 = nn.Linear(EMBEDDING_SIZE, hidden_dim)
예제 #28
0
Y_train = pd.get_dummies(bdf['label']).values

print(Y_train)
print("=" * 80)
print(len(X_train))
print(len(Y_train))

# rf.fit(X_train, Y_train)
# Y_test = rf.predict(X_test)
# result = np.argmax(Y_test, axis = 1)

# import tensorflow as tf
import torch
import torch.utils.data as Data

torch_dataset = Data.TensorDataset(torch.from_numpy(X_train).double(), 
                                   torch.from_numpy(Y_train).double())
train_loader = Data.DataLoader(
    dataset=torch_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
)


import torch.nn as nn
from math import sqrt
import torch

class NNmodel(nn.Module):
    def __init__(self):
        super().__init__()
        self.W = nn.Parameter(torch.Tensor(1, 12))
예제 #29
0
# _*_ coding: utf-8 _*_
__author__ = 'LelandYan'
__date__ = '2019/7/21 16:10'

import torch
import torch.utils.data as Data

BATCH_SIZE = 8

x = torch.linspace(1,10,10)
y = torch.linspace(10,1,10)

torch_dataset = Data.TensorDataset(x,y)
loader = Data.DataLoader(dataset=torch_dataset,shuffle=True,batch_size=BATCH_SIZE,num_workers=2)
if __name__ == '__main__':
    pass
    for epoch in range(3):
        for step,(batch_x,batch_y) in enumerate(loader):
            print("Epoch: ",epoch,"| Step: ",step,"| batch x: ",batch_x.numpy(),"| batch_y: ",batch_y.numpy())
# In[47]:

model = RNN(input_size, hidden_size, num_layers, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# In[54]:

input_train = torch.load("input_train.pt")
_label_train = torch.load("label_train.pt")
_input_test = torch.load("input_test.pt")
_label_test = torch.load("label_test.pt")

print(input_train.pop())

train = data_utils.TensorDataset(_input_train, _label_train)
train_loader = data_utils.DataLoader(train,
                                     batch_size=batch_size,
                                     shuffle=True)
# test = data_utils.TensorDataset(_input_test, _label_test)
# test_loader = data_utils.DataLoader(test, shuffle=True)
# total_step = len(train_loader)
epoch_start = time.time()
loss = 0
all_losses = []
# for epoch in range(num_epochs):
# i is the counter, ith batch, j is the value of batch
# for i,(feature, label) in enumerate(train_loader):
#         feature = feature.reshape(-1, sequence_length, input_size)
#         print (feature.shape)
#         # Forward pass