def __init__(self, filter_height, filter_width, channels, num_filters, stride, exp_global=Ref(Path("exp_global"))): """ Args: num_layers: depth of the RNN input_dim: size of the inputs hidden_dim: size of the outputs (and intermediate RNN layer representations) """ model = exp_global.dynet_param_collection.param_col self.filter_height = filter_height self.filter_width = filter_width self.channels = channels self.num_filters = num_filters self.stride = stride # (2,2) self.hidden_states = {} normalInit = dy.NormalInitializer(0, 0.1) self.filters1 = model.add_parameters( dim=(self.filter_height[0], self.filter_width[0], self.channels[0], self.num_filters[0]), init=normalInit) self.filters2 = model.add_parameters( dim=(self.filter_height[1], self.filter_width[1], self.channels[1], self.num_filters[1]), init=normalInit) self.filters3 = model.add_parameters( dim=(self.filter_height[2], self.filter_width[2], self.channels[2], self.num_filters[2]), init=normalInit)
def __init__(self, filter_height, filter_width, channels, num_filters, stride): """ :param num_layers: depth of the RNN :param input_dim: size of the inputs :param hidden_dim: size of the outputs (and intermediate RNN layer representations) :param model :param rnn_builder_factory: RNNBuilder subclass, e.g. LSTMBuilder """ model = model_globals.dynet_param_collection.param_col self.filter_height = filter_height self.filter_width = filter_width self.channels = channels self.num_filters = num_filters self.stride = stride # (2,2) normalInit = dy.NormalInitializer(0, 0.1) self.filters1 = model.add_parameters( dim=(self.filter_height[0], self.filter_width[0], self.channels[0], self.num_filters[0]), init=normalInit) self.filters2 = model.add_parameters( dim=(self.filter_height[1], self.filter_width[1], self.channels[1], self.num_filters[1]), init=normalInit) self.filters3 = model.add_parameters( dim=(self.filter_height[2], self.filter_width[2], self.channels[2], self.num_filters[2]), init=normalInit)
def __init__(self, filter_height, filter_width, channels, num_filters, stride): """ Args: num_layers: depth of the RNN input_dim: size of the inputs hidden_dim: size of the outputs (and intermediate RNN layer representations) """ model = ParamManager.my_params(self) self.filter_height = filter_height self.filter_width = filter_width self.channels = channels self.num_filters = num_filters self.stride = stride # (2,2) self.hidden_states = {} normalInit = dy.NormalInitializer(0, 0.1) self.filters1 = model.add_parameters( dim=(self.filter_height[0], self.filter_width[0], self.channels[0], self.num_filters[0]), init=normalInit) self.filters2 = model.add_parameters( dim=(self.filter_height[1], self.filter_width[1], self.channels[1], self.num_filters[1]), init=normalInit) self.filters3 = model.add_parameters( dim=(self.filter_height[2], self.filter_width[2], self.channels[2], self.num_filters[2]), init=normalInit)
def __init__(self, du, nu, de, pc, pre_user=None): super(LookupUserRecognizer, self).__init__(pc) self.du, self.nu = du, nu if pre_user is None: init = dy.NormalInitializer(1 / self.du, np.sqrt(1 / self.du)) self.U_p = self.pc.add_lookup_parameters((self.nu, self.du), init=init, name='U') else: self.U_p = self.pc.lookup_parameters_from_numpy(pre_user, name='U')
def __init__(self, v, du, nu, de, pc, pretrained_BU=None): super(LogFactVocabUserRecognizer, self).__init__(pc) self.du = du self.v, self.nu = v, nu # User vectors self.U_p = self.pc.add_lookup_parameters((nu, 1, du), init=dy.ConstInitializer(0), name='U') init = dy.NormalInitializer(1 / self.du, np.sqrt(1 / self.du)) # Biases self.B_p = self.pc.add_parameters((v, du), init=init, name='B') self.avg = None self.BU_p = None
def __init__(self, input_dim, window_receptor, output_dim, num_layers, internal_dim, non_linearity='linear', exp_global=Ref(Path("exp_global"))): """ Args: num_layers: num layers after first receptor conv input_dim: size of the inputs window_receptor: window for the receptor ouput_dim: size of the outputs internal_dim: size of hidden dimension, internal dimension non_linearity: Non linearity to apply between layers """ model = exp_global.dynet_param_collection.param_col self.input_dim = input_dim self.window_receptor = window_receptor self.internal_dim = internal_dim self.non_linearity = non_linearity self.output_dim = output_dim if self.non_linearity == 'linear': self.gain = 1.0 elif self.non_linearity == 'tanh': self.gain = 1.0 elif self.non_linearity == 'relu': self.gain = 0.5 elif self.non_linearity == 'sigmoid': self.gain = 4.0 normalInit = dy.NormalInitializer(0, 0.1) self.pConv1 = model.add_parameters(dim=(self.input_dim, self.window_receptor, 1, self.internal_dim), init=normalInit) self.pBias1 = model.add_parameters(dim=(self.internal_dim)) self.builder_layers = [] for _ in range(num_layers): conv = model.add_parameters(dim=(self.internal_dim, 1, 1, self.internal_dim), init=normalInit) bias = model.add_parameters(dim=(self.internal_dim)) self.builder_layers.append((conv, bias)) self.last_conv = model.add_parameters(dim=(self.internal_dim, 1, 1, self.output_dim), init=normalInit) self.last_bias = model.add_parameters(dim=(self.output_dim))
def __init__(self, input_dim: numbers.Integral, window_receptor: numbers.Integral, output_dim: numbers.Integral, num_layers: numbers.Integral, internal_dim: numbers.Integral, non_linearity: str = 'linear') -> None: """ Args: num_layers: num layers after first receptor conv input_dim: size of the inputs window_receptor: window for the receptor ouput_dim: size of the outputs internal_dim: size of hidden dimension, internal dimension non_linearity: Non linearity to apply between layers """ model = param_collections.ParamManager.my_params(self) self.input_dim = input_dim self.window_receptor = window_receptor self.internal_dim = internal_dim self.non_linearity = non_linearity self.output_dim = output_dim if self.non_linearity == 'linear': self.gain = 1.0 elif self.non_linearity == 'tanh': self.gain = 1.0 elif self.non_linearity == 'relu': self.gain = 0.5 elif self.non_linearity == 'sigmoid': self.gain = 4.0 normalInit = dy.NormalInitializer(0, 0.1) self.pConv1 = model.add_parameters(dim=(self.input_dim, self.window_receptor, 1, self.internal_dim), init=normalInit) self.pBias1 = model.add_parameters(dim=(self.internal_dim, )) self.builder_layers = [] for _ in range(num_layers): conv = model.add_parameters(dim=(self.internal_dim, 1, 1, self.internal_dim), init=normalInit) bias = model.add_parameters(dim=(self.internal_dim, )) self.builder_layers.append((conv, bias)) self.last_conv = model.add_parameters(dim=(self.internal_dim, 1, 1, self.output_dim), init=normalInit) self.last_bias = model.add_parameters(dim=(self.output_dim, ))
def __init__(self, in_height, out_height): """ Args: num_layers: depth of the RNN input_dim: size of the inputs hidden_dim: size of the outputs (and intermediate RNN layer representations) """ model = ParamManager.my_params(self) self.in_height = in_height self.out_height = out_height normalInit=dy.NormalInitializer(0, 0.1) self.pW = model.add_parameters(dim = (self.out_height, self.in_height), init=normalInit) self.pb = model.add_parameters(dim = self.out_height)
def __init__(self, in_height, out_height, nonlinearity='linear'): """ Args: in_height: input dimension of the affine transform out_height: output dimension of the affine transform nonlinearity: nonlinear activation function """ model = ParamManager.my_params(self) self.in_height = in_height self.out_height = out_height self.nonlinearity = nonlinearity normalInit = dy.NormalInitializer(0, 0.1) self.pW = model.add_parameters(dim=(self.out_height, self.in_height), init=normalInit) self.pb = model.add_parameters(dim=self.out_height)
def __init__(self, num_layers, input_dim, hidden_dim, model, rnn_builder_factory, chn_dim, num_filters, filter_size_time, filter_size_freq, stride): """ :param num_layers: depth of the RNN :param input_dim: size of the inputs :param hidden_dim: size of the outputs (and intermediate RNN layer representations) :param model :param rnn_builder_factory: RNNBuilder subclass, e.g. LSTMBuilder """ assert num_layers > 0 assert hidden_dim % 2 == 0 assert input_dim % chn_dim == 0 self.chn_dim = chn_dim self.freq_dim = input_dim / chn_dim self.num_filters = num_filters # 32 self.filter_size_time = filter_size_time # 3 self.filter_size_freq = filter_size_freq # 3 self.stride = stride # (2,2) normalInit = dy.NormalInitializer(0, 0.1) self.filters1 = model.add_parameters( dim=(self.filter_size_time, self.filter_size_freq, self.chn_dim, self.num_filters), init=normalInit) self.filters2 = model.add_parameters( dim=(self.filter_size_time, self.filter_size_freq, self.num_filters, self.num_filters), init=normalInit) conv_dim_l1 = math.ceil( float(self.freq_dim - self.filter_size_freq + 1) / float(self.stride[1])) conv_dim_l2 = int( math.ceil( float(conv_dim_l1 - self.filter_size_freq + 1) / float(self.stride[1]))) conv_dim_out = conv_dim_l2 * self.num_filters self.builder_layers = [] f = rnn_builder_factory(1, conv_dim_out, hidden_dim / 2, model) b = rnn_builder_factory(1, conv_dim_out, hidden_dim / 2, model) self.builder_layers.append((f, b)) for _ in xrange(num_layers - 1): f = rnn_builder_factory(1, hidden_dim, hidden_dim / 2, model) b = rnn_builder_factory(1, hidden_dim, hidden_dim / 2, model) self.builder_layers.append((f, b))
def __init__(self, v, du, nu, de, pc, pretrained_BU=None): super(FactVocabUserRecognizer, self).__init__(pc) # prediction parameters self.Wh_p = self.pc.add_parameters((de, de), name='Wh') self.bh_p = self.pc.add_parameters((de,), name='bh', init=dy.ConstInitializer(0)) self.Su_p = self.pc.add_parameters((du, de), name='Su') self.bu_p = self.pc.add_parameters((du,), name='bu', init=dy.ConstInitializer(0)) self.du = du self.v, self.nu = v, nu # User vectors self.U_p = self.pc.add_lookup_parameters((nu, du), init=dy.ConstInitializer(0), name='U') init = dy.NormalInitializer(1 / self.du, np.sqrt(1 / self.du)) # Biases self.B_p = self.pc.add_parameters((v, du), init=init, name='B') self.avg = None self.BU_p = None
def __init__(self, in_height, out_height): """ :param num_layers: depth of the RNN :param input_dim: size of the inputs :param hidden_dim: size of the outputs (and intermediate RNN layer representations) :param model :param rnn_builder_factory: RNNBuilder subclass, e.g. LSTMBuilder """ model = model_globals.dynet_param_collection.param_col self.in_height = in_height self.out_height = out_height normalInit = dy.NormalInitializer(0, 0.1) self.pW = model.add_parameters(dim=(self.out_height, self.in_height), init=normalInit) self.pb = model.add_parameters(dim=self.out_height)
def __init__(self, in_height, out_height, nonlinearity='linear', exp_global=Ref(Path("exp_global"))): """ :param in_height, out_height: input and output dimension of the affine transform :param nonlinearity: nonlinear activation function """ model = exp_global.dynet_param_collection.param_col self.in_height = in_height self.out_height = out_height self.nonlinearity = nonlinearity normalInit = dy.NormalInitializer(0, 0.1) self.pW = model.add_parameters(dim=(self.out_height, self.in_height), init=normalInit) self.pb = model.add_parameters(dim=self.out_height)
def __init__(self, in_height, out_height, exp_global=Ref(Path("exp_global"))): """ Args: num_layers: depth of the RNN input_dim: size of the inputs hidden_dim: size of the outputs (and intermediate RNN layer representations) """ model = exp_global.dynet_param_collection.param_col self.in_height = in_height self.out_height = out_height normalInit = dy.NormalInitializer(0, 0.1) self.pW = model.add_parameters(dim=(self.out_height, self.in_height), init=normalInit) self.pb = model.add_parameters(dim=self.out_height)
def __init__(self, input_dim, hidden_dim, output_dim, learning_rate=0.001): self._model = dy.ParameterCollection() self._input_dim = input_dim self._hidden_dim = hidden_dim self._output_dim = output_dim self._rnn = dy.SimpleRNNBuilder(self.LAYERS, self._input_dim, self._hidden_dim, self._model) # self._rnn.disable_dropout() self._W = self._model.add_parameters( (self._output_dim, self._hidden_dim), init=dy.NormalInitializer()) self._learning_rate = learning_rate self._trainer = dy.MomentumSGDTrainer( self._model, learning_rate=self._learning_rate) self._l2_param = 0.0006 # self._l2_param = 0.0 self._init_layers()
def __init__(self, head_count: int, model_dim: int, downsample_factor: int = 1, input_dim: int = None, ignore_masks: bool = False, plot_attention: typing.Optional[str] = None, diag_gauss_mask: typing.Union[bool, numbers.Real] = False, square_mask_std: bool = True, cross_pos_encoding_type: typing.Optional[str] = None, kq_pos_encoding_type: typing.Optional[str] = None, kq_pos_encoding_size: int = 40, max_len: int = 1500, param_init: xnmt.param_initializers.ParamInitializer = xnmt. param_initializers.GlorotInitializer(), bias_init: xnmt.param_initializers.ParamInitializer = xnmt. param_initializers.ZeroInitializer(), linear_kvq=None, kq_positional_embedder=None, layer_norm=None, res_shortcut=None, desc: typing.Any = None) -> None: if input_dim is None: input_dim = model_dim self.input_dim = input_dim assert model_dim % head_count == 0 self.dim_per_head = model_dim // head_count self.model_dim = model_dim self.head_count = head_count assert downsample_factor >= 1 self.downsample_factor = downsample_factor self.plot_attention = plot_attention self.plot_attention_counter = 0 self.desc = desc self.ignore_masks = ignore_masks self.diag_gauss_mask = diag_gauss_mask self.square_mask_std = square_mask_std self.kq_pos_encoding_type = kq_pos_encoding_type self.kq_pos_encoding_size = kq_pos_encoding_size self.max_len = max_len subcol = param_collections.ParamManager.my_params(self) if self.kq_pos_encoding_type is None: self.linear_kvq = self.add_serializable_component( "linear_kvq", linear_kvq, lambda: transforms.Linear(input_dim * downsample_factor, head_count * self.dim_per_head * 3, param_init=param_init, bias_init=bias_init)) else: self.linear_kq, self.linear_v = \ self.add_serializable_component("linear_kvq", linear_kvq, lambda: [ transforms.Linear(input_dim * downsample_factor + self.kq_pos_encoding_size, head_count * self.dim_per_head * 2, param_init=param_init, bias_init=bias_init), transforms.Linear(input_dim * downsample_factor, head_count * self.dim_per_head, param_init=param_init, bias_init=bias_init)]) assert self.kq_pos_encoding_type == "embedding" self.kq_positional_embedder = self.add_serializable_component( "kq_positional_embedder", kq_positional_embedder, lambda: embedders.PositionEmbedder(max_pos=self.max_len, emb_dim=self.kq_pos_encoding_size, param_init=param_init)) if self.diag_gauss_mask: if self.diag_gauss_mask == "rand": rand_init = np.exp( (np.random.random(size=(self.head_count, ))) * math.log(1000)) self.diag_gauss_mask_sigma = subcol.add_parameters( dim=(1, 1, self.head_count), init=dy.NumpyInitializer(rand_init)) else: self.diag_gauss_mask_sigma = subcol.add_parameters( dim=(1, 1, self.head_count), init=dy.ConstInitializer(self.diag_gauss_mask)) self.layer_norm = self.add_serializable_component( "layer_norm", layer_norm, lambda: norms.LayerNorm(model_dim)) if model_dim != input_dim * downsample_factor: self.res_shortcut = self.add_serializable_component( "res_shortcut", res_shortcut, lambda: transforms.Linear(input_dim * downsample_factor, model_dim, param_init=param_init, bias_init=bias_init)) self.cross_pos_encoding_type = cross_pos_encoding_type if cross_pos_encoding_type == "embedding": self.cross_pos_emb_p1 = subcol.add_parameters( dim=(self.max_len, self.dim_per_head, self.head_count), init=dy.NormalInitializer(mean=1.0, var=0.001)) self.cross_pos_emb_p2 = subcol.add_parameters( dim=(self.max_len, self.dim_per_head, self.head_count), init=dy.NormalInitializer(mean=1.0, var=0.001)) elif cross_pos_encoding_type is not None: raise NotImplementedError()
from tupa.config import Config from tupa.features.feature_params import MISSING_VALUE TRAINERS = { "sgd": (dy.SimpleSGDTrainer, "e0"), "cyclic": (dy.CyclicalSGDTrainer, "e0_min"), "momentum": (dy.MomentumSGDTrainer, "e0"), "adagrad": (dy.AdagradTrainer, "e0"), "adadelta": (dy.AdadeltaTrainer, None), "rmsprop": (dy.RMSPropTrainer, "e0"), "adam": (partial(dy.AdamTrainer, beta_2=0.9), "alpha"), } INITIALIZERS = { "glorot_uniform": dy.GlorotInitializer(), "normal": dy.NormalInitializer(), "uniform": dy.UniformInitializer(1), "const": dy.ConstInitializer(0), } ACTIVATIONS = { "square": dy.square, "cube": dy.cube, "tanh": dy.tanh, "sigmoid": dy.logistic, "relu": dy.rectify, } class NeuralNetwork(Classifier): """
def __init__(self, vocab, pos, xpos, rels, w2i, c2i, ext_words_train, ext_words_devtest, options): self.model = dy.ParameterCollection() self.pretrained_embs = dy.ParameterCollection() self.learning_rate = options.learning_rate self.trainer = dy.AdamTrainer(self.model, alpha=self.learning_rate, beta_1=0.9, beta_2=0.9, eps=1e-12) self.dropout = float(options.dropout) self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.cdims = options.cembedding_dims self.posdims = options.posembedding_dims self.pred_batch_size = options.pred_batch_size self.ext_words_train = { word: ind + 2 for word, ind in ext_words_train.items() } self.ext_words_devtest = { word: ind + 2 for word, ind in ext_words_devtest.items() } self.wordsCount = vocab self.vocab = {word: ind + 2 for word, ind in w2i.items()} self.pos = {word: ind + 2 for ind, word in enumerate(pos)} self.id2pos = {ind: word for word, ind in self.pos.items()} self.xpos = {word: ind + 2 for ind, word in enumerate(xpos)} self.id2xpos = {ind: word for word, ind in self.xpos.items()} self.c2i = c2i self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = {ind: word for word, ind in self.rels.items()} self.vocab['PAD'] = 1 self.pos['PAD'] = 1 self.xpos['PAD'] = 1 self.external_embedding, self.edim, self.edim_out = None, 0, 0 if options.external_embedding is not None: self.external_embedding = np.load(options.external_embedding) self.ext_voc = pickle.load( open(options.external_embedding_voc, "rb")) self.edim = self.external_embedding.shape[1] self.projected_embs = Lin_Projection(self.model, self.edim, self.wdims) self.elookup_train = self.pretrained_embs.add_lookup_parameters( (len(self.ext_words_train) + 2, self.edim)) for word, i in self.ext_words_train.items(): self.elookup_train.init_row( i, self.external_embedding[self.ext_voc[word], :]) self.elookup_train.init_row(0, np.zeros(self.edim)) self.elookup_train.init_row(1, np.zeros(self.edim)) self.elookup_devtest = self.pretrained_embs.add_lookup_parameters( (len(self.ext_words_devtest) + 2, self.edim)) for word, i in self.ext_words_devtest.items(): self.elookup_devtest.init_row( i, self.external_embedding[self.ext_voc[word], :]) self.elookup_devtest.init_row(0, np.zeros(self.edim)) self.elookup_devtest.init_row(1, np.zeros(self.edim)) self.ext_words_train['PAD'] = 1 self.ext_words_devtest['PAD'] = 1 print( 'Load external embeddings. External embeddings vectors dimension', self.edim) #LSTMs self.fwdLSTM1 = LSTM(self.model, self.wdims + self.posdims, self.ldims, forget_bias=0.0) self.bwdLSTM1 = LSTM(self.model, self.wdims + self.posdims, self.ldims, forget_bias=0.0) self.fwdLSTM2 = LSTM(self.model, self.ldims, self.ldims, forget_bias=0.0) self.bwdLSTM2 = LSTM(self.model, self.ldims, self.ldims, forget_bias=0.0) self.fwdLSTM3 = LSTM(self.model, self.ldims, self.ldims, forget_bias=0.0) self.bwdLSTM3 = LSTM(self.model, self.ldims, self.ldims, forget_bias=0.0) self.biaffineParser = DeepBiaffineAttentionDecoder( self.model, len(self.rels), src_ctx_dim=self.ldims * 2, n_arc_mlp_units=400, n_label_mlp_units=100, arc_mlp_dropout=self.dropout, label_mlp_dropout=self.dropout) self.HybridCharembs = HybridCharacterAttention(self.model, ldims=400, input_size=self.cdims, output_size=self.wdims, dropout=self.dropout) self.wlookup = self.model.add_lookup_parameters( (len(vocab) + 2, self.wdims), init=dy.ConstInitializer(0)) #0 for unknown 1 for [PAD] self.poslookup = self.model.add_lookup_parameters( (len(self.pos) + 2, self.posdims), init=dy.ConstInitializer(0)) #0 for unknown 1 for [PAD] self.xposlookup = self.model.add_lookup_parameters( (len(self.xpos) + 2, self.posdims), init=dy.ConstInitializer(0)) #0 for unknown 1 for [PAD] self.clookup = self.model.add_lookup_parameters( (len(c2i), self.cdims), init=dy.NormalInitializer()) self.ROOT = self.model.add_parameters((self.wdims * 2), init=dy.ConstInitializer(0))
def __init__(self, pc, d_i, d_h, d_o): self.i2h = pc.add_parameters((d_h, d_i), init=dy.NormalInitializer()) self.bh = pc.add_parameters((d_h, ), init=dy.NormalInitializer()) self.h2o = pc.add_parameters((d_o, d_h), init=dy.NormalInitializer()) self.bo = pc.add_parameters((d_o, ), init=dy.NormalInitializer())
def initializer(self, dim, is_lookup=False, num_shared=1): return dy.NormalInitializer(mean=self.mean, var=self.var)
def main(): dy.renew_cg() try: train_file = open("%s" %(sys.argv[1])) test_file = open("%s" %(sys.argv[2])) except: print("python classification_dynet.py <train_file> <test_file>") sys.exit(1) train_text_set, train_content_label_set, train_type_label_set, unique_content, unique_type = extract_from_json(train_file) test_text_set, test_content_label_set, test_type_label_set, _, _ = extract_from_json(test_file) word_dict = {} word_dict = extract_dictionary(train_text_set, word_dict) word_dict = extract_dictionary(test_text_set, word_dict) train_feature_matrix = generate_feature_matrix(train_text_set, word_dict) test_feature_matrix = generate_feature_matrix(test_text_set, word_dict) features_total = len(train_feature_matrix[0]) para_collec = dy.ParameterCollection() pW1 = para_collec.add_parameters((150, 200), dy.NormalInitializer()) pBias1 = para_collec.add_parameters((150), dy.ConstInitializer(0)) pW2_content = para_collec.add_parameters((100, 150), dy.NormalInitializer()) pBias2_content = para_collec.add_parameters((100), dy.ConstInitializer(0)) pW3_content = para_collec.add_parameters((len(unique_content), 100), dy.NormalInitializer()) pBias3_content = para_collec.add_parameters((len(unique_content)), dy.ConstInitializer(0)) pW2_type = para_collec.add_parameters((50, 150), dy.NormalInitializer()) pBias2_type = para_collec.add_parameters((50), dy.ConstInitializer(0)) pW3_type = para_collec.add_parameters((len(unique_type), 50), dy.NormalInitializer()) pBias3_type = para_collec.add_parameters((len(unique_type)), dy.ConstInitializer(0)) lookup = para_collec.add_lookup_parameters((features_total, 200), dy.NormalInitializer()) trainer = dy.SimpleSGDTrainer(para_collec) for i in range(0, 1): # resample minority and majority classes majority, majority_content_label, majority_type_label, minority, minority_content_label, minority_type_label = label_separator("type", train_feature_matrix, train_content_label_set, train_type_label_set) minority_u_text, minority_u_content_label, minority_u_type_label = resample(minority, minority_content_label, minority_type_label, replace=True, n_samples=int(len(majority) * 3), random_state=123) X_train = train_feature_matrix y_train_content = train_content_label_set y_train_type = train_type_label_set for index in range(0, 500): w1 = dy.parameter(pW1) bias1 = dy.parameter(pBias1) w2_content = dy.parameter(pW2_content) bias2_content = dy.parameter(pBias2_content) w3_content = dy.parameter(pW3_content) bias3_content = dy.parameter(pBias3_content) w2_type = dy.parameter(pW2_type) bias2_type = dy.parameter(pBias2_type) w3_type = dy.parameter(pW3_type) bias3_type = dy.parameter(pBias3_type) input_text = [] input_array = X_train[index] for i in range(0, X_train[index].size): if X_train[index][i] > 0: input_text.append(lookup[X_train[index][i]]) x = dy.concatenate(input_text, 1) e_in = dy.sum_dim(x, [1])/features_total e_affin1 = dy.affine_transform([bias1, w1, e_in]) e_affin1 = dy.rectify(e_affin1) e_content_affin2 = dy.affine_transform([bias2_content, w2_content, e_affin1]) e_content_affin2 = dy.dropout(e_content_affin2, 0.5) e_content_affin2 = dy.rectify(e_content_affin2) e_content_affin3 = dy.affine_transform([bias3_content, w3_content, e_content_affin2]) e_content_affin3 = dy.dropout(e_content_affin3, 0.5) e_content_affin3 = dy.rectify(e_content_affin3) e_type_affin2 = dy.affine_transform([bias2_type, w2_type, e_affin1]) e_type_affin2 = dy.dropout(e_type_affin2, 0.5) e_type_affin2 = dy.rectify(e_type_affin2) e_type_affin3 = dy.affine_transform([bias3_type, w3_type, e_type_affin2]) e_type_affin3 = dy.dropout(e_type_affin3, 0.5) e_type_affin3 = dy.rectify(e_type_affin3) content_output = dy.pickneglogsoftmax(e_content_affin3, y_train_content[index]) content_loss = content_output.scalar_value() type_output = dy.pickneglogsoftmax(e_type_affin3, y_train_type[index]) type_loss = type_output.scalar_value() if index % 100 == 0: print(index, ": content_loss: ", content_loss, "type_loss", type_loss) content_output.backward() trainer.update() type_output.backward() trainer.update() dy.cg_checkpoint() print("testing...") pred_content = [] pred_type = [] w1 = dy.parameter(pW1) bias1 = dy.parameter(pBias1) w2_content = dy.parameter(pW2_content) bias2_content = dy.parameter(pBias2_content) w3_content = dy.parameter(pW3_content) bias3_content = dy.parameter(pBias3_content) w2_type = dy.parameter(pW2_type) bias2_type = dy.parameter(pBias2_type) w3_type = dy.parameter(pW3_type) bias3_type = dy.parameter(pBias3_type) for index in range(0, len(test_feature_matrix)): input_text = [] line = train_text_set[index] for word in line: # check if RT if word == "RT": input_text.append(lookup[len(word_dict)]) # check if hashtag if word[0] == "#": input_text.append(lookup[len(word_dict) + 1]) # check if mention if word[0] == "@": input_text.append(lookup[len(word_dict) + 2]) # just word itself if word in word_dict: input_text.append(lookup[word_dict[word]]) try: # lower capiticalization of the word lower_word = str(word).lower() input_text.append(lookup[word_dict[lower_word]]) # no punctuation replace_punctuation = str(word).maketrans(string.punctuation, '') clean_word = str(word).translate(replace_punctuation) input_text.append(lookup[word_dict[clean_word]]) except: continue e_in = dy.sum_dim(x, [1])/features_total e_affin1 = dy.affine_transform([bias1, w1, e_in]) e_affin1 = dy.rectify(e_affin1) e_content_affin2 = dy.affine_transform([bias2_content, w2_content, e_affin1]) e_content_affin2 = dy.rectify(e_content_affin2) e_content_affin3 = dy.affine_transform([bias3_content, w3_content, e_content_affin2]) e_content_affin3 = dy.rectify(e_content_affin3) e_type_affin2 = dy.affine_transform([bias2_type, w2_type, e_affin1]) e_type_affin2 = dy.rectify(e_type_affin2) e_type_affin3 = dy.affine_transform([bias3_type, w3_type, e_type_affin2]) e_type_affin3 = dy.rectify(e_type_affin3) content_output = np.argmax(e_content_affin3.npvalue()) pred_content.append(content_output) type_output = np.argmax(e_type_affin3.npvalue()) pred_type.append(type_output) misclassification_content = 0 misclassification_type = 0 for index in range(0, len(pred_content)): if pred_content[index] != test_content_label_set[index]: misclassification_content += 1 if pred_type[index] != test_type_label_set[index]: misclassification_type += 1 print("content acc: ", (1 - float(misclassification_content/len(pred_content)))) print("type acc: ", (1 - float(misclassification_type/len(pred_type))))
trainer = dy.SimpleSGDTrainer( model, # Trainer opt.learning_rate, opt.learning_rate_decay) trainer.set_clip_threshold(-1) # Disable gradient clipping # Create the parameters params = [] # This holds the parameters for each layer for i, (di, do) in enumerate(zip( dims, dims[1:])): # Iterate over the input/output dimensions for each layer var = 2 / (di + do) / ( 1 if (i == num_layers - 1) else gain ) # Variance for the initialization (See Glorot, Bengio (2011)) W_p = model.add_parameters( (do, di), init=dy.NormalInitializer(0, var)) # Sample weights b_p = model.add_parameters( (do, ), init=dy.ConstInitializer(0)) # Initialize biases at 0 params.append((W_p, b_p)) # Add to the list # Load existing model if opt.model_in is not None: print('Loading from file:', opt.model_in) params_list = model.load(opt.model_in) params = [ (W_p, b_p) for W_p, b_p in zip(params_list[:num_layers], params_list[num_layers:]) ] def run_MLP(x):
def initializer(self, dim, is_lookup: bool = False, num_shared: numbers.Integral = 1) -> dy.NormalInitializer: return dy.NormalInitializer(mean=self.mean, var=self.var)
import dynet """ various helper mappings """ ## DyNet adds init option to choose initializer: https://github.com/clab/dynet/blob/master/python/CHANGES.md INITIALIZER_MAP = { 'glorot': dynet.GlorotInitializer(), 'constant': dynet.ConstInitializer(0.01), 'uniform': dynet.UniformInitializer(0.1), 'normal': dynet.NormalInitializer(mean=0, var=1) } TRAINER_MAP = { "sgd": dynet.SimpleSGDTrainer, "adam": dynet.AdamTrainer, "adadelta": dynet.AdadeltaTrainer, "adagrad": dynet.AdagradTrainer, "momentum": dynet.MomentumSGDTrainer } ACTIVATION_MAP = {"tanh": dynet.tanh, "rectify": dynet.rectify} BUILDERS = { "lstm": dynet. LSTMBuilder, # is dynet.VanillaLSTMBuilder (cf. https://github.com/clab/dynet/issues/474) "lstmc": dynet.CoupledLSTMBuilder, "gru": dynet.GRUBuilder, "rnn": dynet.SimpleRNNBuilder }
# Model parameters num_classes = len(set(train_y)) # Number of classes input_length = train_x.shape[1] # Dimension of the input dh= opt.hidden_dim di= 1 # Create model model = dy.Model() # DyNet Model trainer = dy.SimpleSGDTrainer(model, # Trainer opt.learning_rate, opt.learning_rate_decay) trainer.set_clip_threshold(-1) # Disable gradient clipping # Create the parameters Wx_p = model.add_parameters((dh, di), init=dy.NormalInitializer(0, 0.001)) # Sample weights Wh_p = model.add_parameters((dh, dh), init=dy.IdentityInitializer()) # Sample weights bh_p = model.add_parameters((dh,), init=dy.ConstInitializer(0)) # Initialize biases at 0 A_p = model.add_parameters((num_classes, dh), init=dy.NormalInitializer(0, 1/(dh+num_classes))) # Sample weights b_p = model.add_parameters((num_classes,), init=dy.ConstInitializer(0)) # Initialize biases at 0 # Load existing model if opt.model_in is not None: print('Loading from file:', opt.model_in) Wx_p, Wh_p, bh_p, A_p, b_p = model.load(opt.model_in) def run_IRNN(x): """ Runs MLP to get the last layer before softmax """