from torch import optim from torch.nn.functional import mse_loss from torch.utils.data import DataLoader torch.seed() # Hyperparameters epochs = 1 lr = 1e-3 opt = optim.Adam([{ 'params': style_discriminator.parameters() }, { 'params': style_encoder.parameters() }, { 'params': embedding.parameters() }]) criterion = mse_loss bs = 75 train_dl = GPUDataLoader(EqualOpDataLoader(train_set, train_label_set, bs=bs), embedding, dev) valid_dl = GPUDataLoader(EqualOpDataLoader(valid_set, valid_label_set, bs=bs), embedding, dev) from tqdm import tqdm torch.seed() def fit(validate=True):
torch.nn.Linear(100, 30), torch.nn.LeakyReLU(), #torch.nn.Linear(30, 10), #torch.nn.LeakyReLU(), torch.nn.Linear(30, 2), # torch.nn.Softplus(), ) if CUDA: lstm = lstm.cuda() model = model.cuda() emb = emb.cuda() # optim = torch.optim.Adam(lstm.parameters(), lr=10e-4) optim = torch.optim.Adam([x for x in lstm.parameters()], lr=10e-4) model_optim = torch.optim.Adam([x for x in model.parameters()], lr=10e-4) emb_optim = torch.optim.Adam(emb.parameters(), lr=10e-4) class DyadLSTMDataset(Dataset): def __init__(self): self.labels = [] self.words = [] with open('dyad_dataset_for_lstm.txt') as f: for line in f.readlines(): user1, user2, acts, pred1, num_days1, pred2, num_days2 = line.strip().split('\t') acts = json.loads(acts) pred1 = float(pred1) pred2 = float(pred2) labels = [[pred1, pred2] if a[0] == 1 else [pred2, pred1] for a in acts if len(a[3]) > 0] words = [a[3] for a in acts if len(a[3]) > 0] self.labels += labels
class BaseModel(nn.Module): @classmethod def load_weights(cls, config: Dict, weights_path: Path): logger.info(f"Loading weights from {weights_path}") state_dict = torch.load(weights_path) return cls.from_state_dict(config, state_dict) @classmethod def from_checkpoint(cls, config: Dict, checkpoint_path: Path): logger.info(f"Loading checkpoint from {checkpoint_path}") checkpoint = torch.load(checkpoint_path) return cls.from_state_dict(config, checkpoint["model_state_dict"]) @classmethod def from_state_dict(cls, config: Dict, state_dict: Dict): model = cls(**config) model.load_state_dict(state_dict) return model def __init__( self, num_embeddings: int = 1024, embedding_dim: int = 128, embedding_initial_weights: Optional[Tensor] = None, freeze_embedding: bool = False, rnn_style: str = "LSTM", rnn_num_layers: int = 1, hidden_dim: int = 128, bidirectional: bool = False, ): super().__init__() self.embedding = Embedding(num_embeddings, embedding_dim) if embedding_initial_weights is not None: self.embedding.load_state_dict({"weight": embedding_initial_weights}) if freeze_embedding: for param in self.embedding.parameters(): param.requires_grad = False self.rnn = RNN_CLASS_MAPPING[rnn_style]( embedding_dim, hidden_dim, rnn_num_layers, bidirectional=bidirectional, batch_first=True, ) num_directions = 2 if bidirectional else 1 self.rnn_output_dim = num_directions * hidden_dim self.hidden_state_dim = rnn_num_layers * num_directions * hidden_dim def forward(self, input_ids: Tensor): """ Arguments input_ids: torch.LongTensor of shape (BS, L) Returns output: torch.FloatTensor of shape (BS, L, num_directions*hidden_dim) hidden_state: torch.FloatTensor of shape (BS, rnn_num_layers*num_directions, hidden_size) """ embedded = self.embedding(input_ids) output, hidden_state = self.rnn(embedded) hidden_state = hidden_state.permute(1, 0, 2) return output, hidden_state def save_weights(self, weights_path: Path): logger.info(f"Saving model weights to {weights_path}") torch.save(self.state_dict(), weights_path)
from torch.nn import Embedding from torch_geometric.nn import Set2Set net = Embedding(95,16) print(sum(p.numel() for p in net.parameters())) from model import ff,MegNetLayer,ff_output net = ff(2) print(sum(p.numel() for p in net.parameters())) net=MegNetLayer() print(sum(p.numel() for p in net.parameters())) net = ff(32) print(sum(p.numel() for p in net.parameters())) net = Set2Set(32,processing_steps=3) # keras 9376 print(sum(p.numel() for p in net.parameters())) net = ff_output(160,200) print(sum(p.numel() for p in net.parameters()))