def __init__(self, input_size: int, hidden_size: int, num_layers: int, recurrent_dropout_probability: float = 0.0, use_highway: bool = True) -> None: super(StackedBidirectionalLstm, self).__init__() # Required to be wrapped with a :class:`PytorchSeq2SeqWrapper`. self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bidirectional = True layers = [] lstm_input_size = input_size for layer_index in range(num_layers): forward_layer = AugmentedLstm(lstm_input_size, hidden_size, go_forward=True, recurrent_dropout_probability=recurrent_dropout_probability, use_highway=use_highway, use_input_projection_bias=False) backward_layer = AugmentedLstm(lstm_input_size, hidden_size, go_forward=False, recurrent_dropout_probability=recurrent_dropout_probability, use_highway=use_highway, use_input_projection_bias=False) lstm_input_size = hidden_size * 2 self.add_module('forward_layer_{}'.format(layer_index), forward_layer) self.add_module('backward_layer_{}'.format(layer_index), backward_layer) layers.append([forward_layer, backward_layer]) self.lstm_layers = layers
def __init__( self, lstm_hidden_size: int = None, input_dropout_rate: float = None, input_size: int = None, use_highway: bool = False, ): super().__init__() self.lstm_hidden_size = lstm_hidden_size self.input_dropout_rate = input_dropout_rate self.input_size = input_size self.use_highway = use_highway self.lstm_forward = AugmentedLstm( self.input_size, self.lstm_hidden_size, go_forward=True, recurrent_dropout_probability=self.input_dropout_rate, use_highway=self.use_highway, use_input_projection_bias=False, ) self.lstm_backward = AugmentedLstm( self.input_size, self.lstm_hidden_size, go_forward=False, recurrent_dropout_probability=self.input_dropout_rate, use_highway=self.use_highway, use_input_projection_bias=False, )
def __init__(self, input_size: int, hidden_size: int, num_layers: int, dropout: float): super(LstmbiLm, self).__init__(stateful=False) self.hidden_size = hidden_size self.cell_size = hidden_size forward_layers = [] backward_layers = [] lstm_input_size = input_size for layer_index in range(num_layers): forward_layer = AugmentedLstm( input_size=lstm_input_size, hidden_size=hidden_size, go_forward=True, recurrent_dropout_probability=dropout) backward_layer = AugmentedLstm( input_size=lstm_input_size, hidden_size=hidden_size, go_forward=False, recurrent_dropout_probability=dropout) lstm_input_size = hidden_size self.add_module('forward_layer_{}'.format(layer_index), forward_layer) self.add_module('backward_layer_{}'.format(layer_index), backward_layer) forward_layers.append(forward_layer) backward_layers.append(backward_layer) self.forward_layers = forward_layers self.backward_layers = backward_layers
def __init__(self, vocab, use_postags_only=True, embed_dim=100, hidden_size=200, recurrent_dropout_probability=0.3, use_highway=False, maxpool=True): super(BLSTMModel, self).__init__() self.embeds = Embedding.from_params( vocab, Params({'vocab_namespace': 'pos' if use_postags_only else 'tokens', 'embedding_dim': embed_dim, 'trainable': True, 'padding_index': 0, 'pretrained_file': None if use_postags_only else 'https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.100d.txt.gz', })) self.binary_feature_embedding = Embedding(2, embed_dim) self.fwd_lstm = PytorchSeq2SeqWrapper(AugmentedLstm( input_size=embed_dim * 2, hidden_size=hidden_size, go_forward=True, recurrent_dropout_probability=recurrent_dropout_probability, use_input_projection_bias=False, use_highway=use_highway), stateful=False) self.bwd_lstm = PytorchSeq2SeqWrapper(AugmentedLstm( input_size=embed_dim * 2, hidden_size=hidden_size, go_forward=False, recurrent_dropout_probability=recurrent_dropout_probability, use_input_projection_bias=False, use_highway=use_highway), stateful=False) self.maxpool = maxpool self.fc = nn.Linear(hidden_size * 2, 1, bias=False)
def test_augmented_lstm_is_initialized_with_correct_biases(self): lstm = AugmentedLstm(2, 3) true_state_bias = numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]) numpy.testing.assert_array_equal(lstm.state_linearity.bias.data.numpy(), true_state_bias) # Non-highway case. lstm = AugmentedLstm(2, 3, use_highway=False) true_state_bias = numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0]) numpy.testing.assert_array_equal(lstm.state_linearity.bias.data.numpy(), true_state_bias)
def __init__( self, input_size: int, hidden_size: int, num_layers: int, recurrent_dropout_probability: float = 0.0, use_highway: bool = True, use_input_projection_bias: bool = True, ) -> None: super().__init__() # Required to be wrapped with a :class:`PytorchSeq2SeqWrapper`. self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers layers = [] lstm_input_size = input_size for layer_index in range(num_layers): go_forward = layer_index % 2 == 0 layer = AugmentedLstm( lstm_input_size, hidden_size, go_forward, recurrent_dropout_probability=recurrent_dropout_probability, use_highway=use_highway, use_input_projection_bias=use_input_projection_bias, ) lstm_input_size = hidden_size self.add_module("layer_{}".format(layer_index), layer) layers.append(layer) self.lstm_layers = layers
def __init__(self, input_size: int, hidden_size: int, num_layers: int, recurrent_dropout_probability: float = 0.0, use_highway: bool = True) -> None: super(StackedAlternatingLstm, self).__init__() # Required to be wrapped with a :class:`PytorchSeq2SeqWrapper`. self.input_size = input_size self.hidden_size = hidden_size layers = [] lstm_input_size = input_size for layer_index in range(num_layers): go_forward = True if layer_index % 2 == 0 else False layer = AugmentedLstm( lstm_input_size, hidden_size, go_forward, recurrent_dropout_probability=recurrent_dropout_probability, use_highway=use_highway) lstm_input_size = hidden_size self.add_module('layer_{}'.format(layer_index), layer) layers.append(layer) self.lstm_layers = layers
def test_augmented_lstm_computes_same_function_as_pytorch_lstm(self): augmented_lstm = AugmentedLstm(10, 11) pytorch_lstm = LSTM(10, 11, num_layers=1, batch_first=True) # Initialize all weights to be == 1. initializer = InitializerApplicator([(".*", lambda tensor: torch.nn.init.constant_(tensor, 1.))]) initializer(augmented_lstm) initializer(pytorch_lstm) initial_state = torch.zeros([1, 5, 11]) initial_memory = torch.zeros([1, 5, 11]) # Use bigger numbers to avoid floating point instability. sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(self.random_tensor * 5., self.sequence_lengths) lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True) augmented_output, augmented_state = augmented_lstm(lstm_input, (initial_state, initial_memory)) pytorch_output, pytorch_state = pytorch_lstm(lstm_input, (initial_state, initial_memory)) pytorch_output_sequence, _ = pad_packed_sequence(pytorch_output, batch_first=True) augmented_output_sequence, _ = pad_packed_sequence(augmented_output, batch_first=True) numpy.testing.assert_array_almost_equal(pytorch_output_sequence.data.numpy(), augmented_output_sequence.data.numpy(), decimal=4) numpy.testing.assert_array_almost_equal(pytorch_state[0].data.numpy(), augmented_state[0].data.numpy(), decimal=4) numpy.testing.assert_array_almost_equal(pytorch_state[1].data.numpy(), augmented_state[1].data.numpy(), decimal=4)
def __init__( self, input_size: int, hidden_size: int, num_layers: int, recurrent_dropout_probability: float = 0.0, layer_dropout_probability: float = 0.0, use_highway: bool = True, ) -> None: super().__init__() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bidirectional = True self.layer_dropout_probability = layer_dropout_probability layers = [] lstm_input_size = input_size for layer_index in range(num_layers): forward_layer = AugmentedLstm( lstm_input_size, hidden_size, go_forward=True, recurrent_dropout_probability=recurrent_dropout_probability, use_highway=use_highway, use_input_projection_bias=False, ) lstm_input_size = hidden_size self.add_module("forward_layer_{}".format(layer_index), forward_layer) layers.append(forward_layer) self.lstm_layers = layers self.layer_dropout = InputVariationalDropout(layer_dropout_probability)
def __init__(self, input_size: int, hidden_size: int, num_layers: int, downsampling: Tuple[int], recurrent_dropout_probability: float = 0.0, use_highway: bool = True, use_input_projection_bias: bool = True) -> None: super(StackedLstm, self).__init__() # Required to be wrapped with a :class:`PytorchSeq2SeqWrapper`. self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.downsampling = downsampling assert len(self.downsampling) == num_layers layers = [] lstm_input_size = input_size for layer_index in range(num_layers): go_forward = True ds_ratio = self.downsampling[layer_index] layer = AugmentedLstm( lstm_input_size, hidden_size, go_forward, recurrent_dropout_probability=recurrent_dropout_probability, use_highway=use_highway, use_input_projection_bias=use_input_projection_bias) lstm_input_size = hidden_size self.add_module('layer_{}'.format(layer_index), layer) layers.append(layer) self.lstm_layers = layers
def __init__(self, word_embedding_dim, lstm_dim, bidirectional=False, use_mu_attention=False, use_self_attention=False, use_yang_attention=False, max_pool=False, dropout=0.2): super().__init__() self.lstm_dim = lstm_dim self.emb_dim = word_embedding_dim self.lstm = AugmentedLstm( word_embedding_dim, lstm_dim, recurrent_dropout_probability=dropout ) # nn.LSTM(word_embedding_dim, lstm_dim, 1, bidirectional=bidirectional) # yang attention self.use_yang_attention = use_yang_attention if (self.use_yang_attention): self.yang_att = YangAttnetion(lstm_dim * 2 if bidirectional else lstm_dim) self.use_mu_attention = use_mu_attention self.use_self_attention = use_self_attention self.max_pool = max_pool
def test_dropout_version_is_different_to_no_dropout(self): augmented_lstm = AugmentedLstm(10, 11) dropped_augmented_lstm = AugmentedLstm( 10, 11, recurrent_dropout_probability=0.9) # Initialize all weights to be == 1. constant_init = Initializer.from_params( Params({ "type": "constant", "val": 0.5 })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(augmented_lstm) initializer(dropped_augmented_lstm) initial_state = torch.randn([1, 5, 11]) initial_memory = torch.randn([1, 5, 11]) # If we use too bigger number like in the PyTorch test the dropout has no affect sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length( self.random_tensor, self.sequence_lengths) lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True) augmented_output, augmented_state = augmented_lstm( lstm_input, (initial_state, initial_memory)) dropped_output, dropped_state = dropped_augmented_lstm( lstm_input, (initial_state, initial_memory)) dropped_output_sequence, _ = pad_packed_sequence(dropped_output, batch_first=True) augmented_output_sequence, _ = pad_packed_sequence(augmented_output, batch_first=True) with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal( dropped_output_sequence.data.numpy(), augmented_output_sequence.data.numpy(), decimal=4) with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal( dropped_state[0].data.numpy(), augmented_state[0].data.numpy(), decimal=4) with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal( dropped_state[1].data.numpy(), augmented_state[1].data.numpy(), decimal=4)
def test_augmented_lstm_works_with_highway_connections(self): augmented_lstm = AugmentedLstm(10, 11, use_highway=True) sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length( self.random_tensor, self.sequence_lengths) lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True) augmented_lstm(lstm_input)
def test_variable_length_sequences_run_backward_return_correctly_padded_outputs(self): sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(self.random_tensor, self.sequence_lengths) tensor = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True) lstm = AugmentedLstm(10, 11, go_forward=False) output, _ = lstm(tensor) output_sequence, _ = pad_packed_sequence(output, batch_first=True) numpy.testing.assert_array_equal(output_sequence.data[1, 6:, :].numpy(), 0.0) numpy.testing.assert_array_equal(output_sequence.data[2, 4:, :].numpy(), 0.0) numpy.testing.assert_array_equal(output_sequence.data[3, 3:, :].numpy(), 0.0) numpy.testing.assert_array_equal(output_sequence.data[4, 2:, :].numpy(), 0.0)
def __init__(self, char_to_index, char_embed_size, hidden_size, output_size, dropout, cuda_flag, batch_first=True): """ Args: char_to_index: char_embed_size: char embeddings dim hidden_size: lstm reccurent dim dropout: dropout probability batch_first: batch first option """ super(Char_RNN, self).__init__() self.char_to_index = char_to_index self.char_embed_size = char_embed_size self.hidden_size = hidden_size self.dropout = dropout self.output_size = output_size self.batch_first = batch_first self.padding_index = self.char_to_index['__PADDING__'] self.cuda_flag = cuda_flag self.char_encoder = nn.Embedding(len(self.char_to_index), self.char_embed_size, sparse=True, padding_idx=self.padding_index) torch.nn.init.xavier_uniform_(self.char_encoder.weight.data) self.char_rnn = AugmentedLstm( input_size=self.char_embed_size, hidden_size=self.hidden_size, go_forward=True, recurrent_dropout_probability=self.dropout, use_highway=False, use_input_projection_bias=False) self.char_rnn.state_linearity.bias.data.fill_(0.0) self.var_drop = InputVariationalDropout(self.dropout) self.w_atten = nn.Linear(self.hidden_size, 1, bias=False) self.w_atten.weight.data.fill_(0.0) self.char_projection = nn.Linear(self.hidden_size * 2, self.output_size, bias=True) self.char_projection.weight.data.fill_(0.0) self.char_projection.bias.data.fill_(0.0) self.drp = nn.Dropout(self.dropout)
def __init__( self, input_size: int, hidden_size: int, num_layers: int, recurrent_dropout_probability: float = 0.0, use_highway: bool = True, use_input_projection_bias: bool = True, ) -> None: super().__init__() # Required to be wrapped with a :class:`PytorchSeq2SeqWrapper`. self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers model_dir = "data" model_name = "openie" device_name = os.getenv("TARGET_DEVICE", default="").upper() assert (device_name in ("U50LV", "U25")), "TARGET_DEVICE should be U50LV/U25" num_sequences = -1 device_core_id = 0 self.thread_lstm = [] self.batch = [] self.total_batch = 0 if (device_name == "U50LV"): xmodel = os.path.join("data", "compiled_batch_3.xmodel") elif (device_name == "U25"): xmodel = os.path.join("data", "compiled_batch_1.xmodel") self.xgraph = xir.Graph.deserialize(xmodel) self.model_lstm = vart.Runner.create_runner( self.xgraph.get_root_subgraph(), "run") layers = [] lstm_input_size = input_size for layer_index in range(num_layers): go_forward = layer_index % 2 == 0 layer = AugmentedLstm( lstm_input_size, hidden_size, go_forward, recurrent_dropout_probability=recurrent_dropout_probability, use_highway=use_highway, use_input_projection_bias=use_input_projection_bias, ) lstm_input_size = hidden_size self.add_module("layer_{}".format(layer_index), layer) layers.append(layer) self.lstm_layers = layers
def __init__(self, vocab_size, tag_size, X_lengths, embedding_dim, hidden_size, recurrent_dropout_probability=0): super(BayesianDropoutLSTM, self).__init__() self.X_lengths = X_lengths self.embedding_layer = nn.Embedding(vocab_size, embedding_dim) self.augmented_lstm = AugmentedLstm( input_size=embedding_dim, hidden_size=hidden_size, recurrent_dropout_probability=recurrent_dropout_probability) self.fc = nn.Linear(hidden_size, tag_size)
def __init__( self, input_size: int, hidden_size: int, num_layers: int, recurrent_dropout_probability: float = 0.0, use_highway: bool = True, use_input_projection_bias: bool = True, ) -> None: super().__init__() # Required to be wrapped with a :class:`PytorchSeq2SeqWrapper`. self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.thread_lstm = [] self.batch = [] self.total_batch = 0 for i in range(core): self.thread_lstm.append(dpu4rnn_py.dpu4rnn.create("openie", i)) self.batch.append(self.thread_lstm[i].getBatch()) self.total_batch += self.batch[i] layers = [] lstm_input_size = input_size for layer_index in range(num_layers): go_forward = layer_index % 2 == 0 layer = AugmentedLstm( lstm_input_size, hidden_size, go_forward, recurrent_dropout_probability=recurrent_dropout_probability, use_highway=use_highway, use_input_projection_bias=use_input_projection_bias, ) lstm_input_size = hidden_size self.add_module("layer_{}".format(layer_index), layer) layers.append(layer) self.lstm_layers = layers with open("model/openie.json", 'r') as load_f: load_dict = json.load(load_f) in_pos = load_dict[0]['lstm_in_float2fix'] out_pos = load_dict[0]['lstm_out_fix2float'] self.input_scale = 2.0**in_pos self.output_scale = 2.0**out_pos
def __init__( self, input_size: int, hidden_size: int, go_forward: bool = True, recurrent_dropout_probability: float = 0.0, use_highway: bool = True, use_input_projection_bias: bool = True, ) -> None: module = AugmentedLstm( input_size=input_size, hidden_size=hidden_size, go_forward=go_forward, recurrent_dropout_probability=recurrent_dropout_probability, use_highway=use_highway, use_input_projection_bias=use_input_projection_bias, ) super().__init__(module=module)
def test_dropout_is_not_applied_to_output_or_returned_hidden_states(self): sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length( self.random_tensor, self.sequence_lengths ) tensor = pack_padded_sequence( sorted_tensor, sorted_sequence.data.tolist(), batch_first=True ) lstm = AugmentedLstm(10, 11, recurrent_dropout_probability=0.5) output, (hidden_state, _) = lstm(tensor) output_sequence, _ = pad_packed_sequence(output, batch_first=True) # Test returned output sequence num_hidden_dims_zero_across_timesteps = ((output_sequence.sum(1) == 0).sum()).item() # If this is not True then dropout has been applied to the output of the LSTM assert not num_hidden_dims_zero_across_timesteps # Should not have dropout applied to the last hidden state as this is not used # within the LSTM and makes it more consistent with the `torch.nn.LSTM` where # dropout is not applied to any of it's output. This would also make it more # consistent with the Keras LSTM implementation as well. hidden_state = hidden_state.squeeze() num_hidden_dims_zero_across_timesteps = ((hidden_state == 0).sum()).item() assert not num_hidden_dims_zero_across_timesteps
def __init__(self, config, L): super().__init__() # Setting up the ebemdding. if "vocab_dim" not in config: config["vocab_dim"] = L.shape[0] assert (config["vocab_dim"], config["embed_dim"]) == L.shape # 1. Set up featurization self.L = nn.Embedding(config["vocab_dim"], config["embed_dim"]) self.L.weight.data = torch.from_numpy(L) self.L.requires_grad = config["update_L"] self.L_ = nn.Embedding(config["feat_dim"], config["embed_dim"]) nn.init.normal_(self.L_.weight) self.L_.requires_grad = config["update_L_"] input_feature_dim = \ config["n_features_fixed"] * config["embed_dim"] +\ config["n_features_learn"] * config["embed_dim"] +\ config["n_features_exact"] # 2. Embed layer if config["embed_layer"] == "conv": self.embed_C0 = nn.Conv1d(input_feature_dim, config['hidden_dim'], 3, stride=1, padding=1) self.embed_Cn = torch.nn.ModuleList([nn.Conv1d(config['hidden_dim'], config['hidden_dim'], 3, stride=1, padding=1) for _ in range(config["n_layers"])]) elif config["embed_layer"] == "none": assert _check_logistic(config) elif config["embed_layer"] == "lstm": # (//2 because bidirectional) self.embed_h0 = Variable(torch.Tensor(2*config["n_layers"], config["hidden_dim"]//2)) self.embed_c0 = Variable(torch.Tensor(2*config["n_layers"], config["hidden_dim"]//2)) self.embed_W = nn.LSTM(input_feature_dim, config["hidden_dim"]//2, num_layers=config["n_layers"], dropout=config["rdropout"], bidirectional=True, batch_first=True, ) for param in self.embed_W.parameters(): if len(param.size()) > 1: nn.init.orthogonal_(param) elif config["embed_layer"] == "alstm": # (//2 because bidirectional) self.embed_h0 = Variable(torch.Tensor(2*config["n_layers"], config["hidden_dim"]//2)) self.embed_c0 = Variable(torch.Tensor(2*config["n_layers"], config["hidden_dim"]//2)) self.embed_Wn = torch.nn.ModuleList([AugmentedLstm(input_feature_dim, config["hidden_dim"]//2, go_forward=(i % 2 == 0), recurrent_dropout_probability=config["dropout"], use_input_projection_bias=False, bidirectional=True, batch_first=True, ) for i in range(2 * config["n_layers"])]) else: raise ValueError("Invalid embedding layer {}".format(config["embed_layer"])) # 3. Node model if config["node_model"] == "simple": self.node_W = nn.Linear(config["hidden_dim"], config["hidden_dim"]) nn.init.xavier_normal_(self.node_W.weight) self.node_U = nn.Linear(config["hidden_dim"], config["output_node_dim"]) nn.init.xavier_normal_(self.node_U.weight) elif config["node_model"] == "none": assert _check_logistic(config) self.node_U = nn.Linear(input_feature_dim, config["output_node_dim"]) nn.init.xavier_normal_(self.node_U.weight) else: raise ValueError("Invalid node model {}".format(config["node_model"])) # 4. Decode layer if config["decode_layer"] == "simple": pass elif config["decode_layer"] == "crf": self.crf = CRF(config["output_node_dim"]) nn.init.orthogonal_(self.crf.transitions) else: raise ValueError("Invalid decode layer {}".format(config["decode_layer"])) # 5. Edge features self.edge_feature_dim = 1 + 1 # Position embeddings and path length embeddings. if config["path_agg"] == "max" or config["path_agg"] == "sum": # Include edge embeddings. self.edge_feature_dim += config["hidden_dim"] # 6. Edge model if config["edge_model"] == "simple": self.edge_W = nn.Linear(3*(config["hidden_dim"]+config["output_node_dim"]) + self.edge_feature_dim, config["hidden_dim"]) nn.init.xavier_normal_(self.edge_W.weight) self.edge_U = nn.Linear(config["hidden_dim"], config["output_arc_dim"]) nn.init.xavier_normal_(self.edge_U.weight) elif config["edge_model"] == "none": assert _check_logistic(config) self.edge_U = nn.Linear(3*(input_feature_dim+config["output_node_dim"]) + self.edge_feature_dim, config["output_arc_dim"]) nn.init.xavier_normal_(self.edge_U.weight) else: raise ValueError("Invalid edge model {}".format(config["edge_model"])) # 7. Objectives self.node_objective = nn.CrossEntropyLoss(torch.FloatTensor(config["node_weights"])) self.edge_objective = nn.CrossEntropyLoss(torch.FloatTensor(config["edge_weights"])) self.config = config
def allennlp_seq2seq(c, num_layers, input, hidden, cell, batch, timestep, repeat, cuda, output): num_layers = int(num_layers) input = int(input) hidden = int(hidden) cell = int(cell) batch = int(batch) timestep = int(timestep) repeat = int(repeat) lstms = [] lstm_input = input for _ in range(num_layers): lstms.append( PytorchSeq2SeqWrapper(AugmentedLstm( input_size=lstm_input, hidden_size=hidden, use_highway=False, use_input_projection_bias=False, ), stateful=True)) lstm_input = hidden input_tensor = torch.rand(batch, timestep, input) if cuda == 'cuda': input_tensor = input_tensor.cuda() lstms = [l.cuda() for l in lstms] durations = [] for idx in range(repeat): batch_lengths = [timestep] batch_lengths.extend( [random.randrange(timestep + 1) for _ in range(batch - 1)]) batch_lengths = sorted(batch_lengths, reverse=True) mask = torch.zeros(batch, timestep, dtype=torch.long) for mask_idx, length in enumerate(batch_lengths): mask[mask_idx, :length] = 1 if cuda == 'cuda': mask = mask.cuda() with torch.no_grad(): time_start = time.time() lstm_input = input_tensor for lstm in lstms: lstm_input = lstm( lstm_input, mask, ) durations.append((idx, time.time() - time_start), ) with open(output, 'w') as fout: json.dump( { 'type': 'allennlp_seq2seq', 'cuda': cuda, 'durations': durations }, fout, ensure_ascii=False, indent=2, )
def test_augmented_lstm_throws_error_on_non_packed_sequence_input(self): lstm = AugmentedLstm(3, 5) tensor = torch.rand([5, 7, 9]) with pytest.raises(ConfigurationError): lstm(tensor)
def __init__( self, input_size: int, hidden_size: int, num_layers: int, recurrent_dropout_probability: float = 0.0, use_highway: bool = True, use_input_projection_bias: bool = True, ) -> None: super().__init__() # Required to be wrapped with a :class:`PytorchSeq2SeqWrapper`. # pdb.set_trace() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers model_dir = "model" model_name = "openie" device_name = os.getenv("TARGET_DEVICE", default="").upper() assert (device_name in ("U50LV", "U25")), "TARGET_DEVICE should be U50LV/U25" num_sequences = -1 device_core_id = 0 self.thread_lstm = [] self.batch = [] self.total_batch = 0 if device_name == "U50LV": core = 2 models = [ os.path.join("data", file) for file in ["compiled_batch_3.xmodel", "compiled_batch_4.xmodel"] ] if device_name == "U25": core = 1 models = [ os.path.join("data", file) for file in ["compiled_batch_1.xmodel"] ] for i in range(core): xmodel = models[i] xgraph = xir.Graph.deserialize(xmodel) self.thread_lstm.append( vart.Runner.create_runner(xgraph.get_root_subgraph(), "run")) inputTensors = self.thread_lstm[i].get_input_tensors() outputTensors = self.thread_lstm[i].get_output_tensors() batch_size, _, runner_in_seq_len = tuple(inputTensors[0].dims) _, _, runner_out_seq_len = tuple(outputTensors[0].dims) self.batch.append(batch_size) self.total_batch += self.batch[i] layers = [] lstm_input_size = input_size for layer_index in range(num_layers): go_forward = layer_index % 2 == 0 layer = AugmentedLstm( lstm_input_size, hidden_size, go_forward, recurrent_dropout_probability=recurrent_dropout_probability, use_highway=use_highway, use_input_projection_bias=use_input_projection_bias, ) lstm_input_size = hidden_size self.add_module("layer_{}".format(layer_index), layer) layers.append(layer) self.lstm_layers = layers out_pos = xgraph.get_root_subgraph().get_attr('output_fix2float') in_pos = xgraph.get_root_subgraph().get_attr('input_float2fix') self.input_scale = 2.0**in_pos self.output_scale = 2.0**out_pos