def __init__(self, time_features=3, fc_activation="relu", encoder_layers=2, decoder_layers=2, fc_layers=3, norm_epsilon=1e-6, transformer_dropout_rate=0.2, pre_out_dim=512, out_dropout=0.3, recycle_fc_activ=tf.keras.activations.elu): super(TimeDiscriminator, self).__init__() self.time_features = time_features self.out_dropout = out_dropout self.discr = transformer.Transformer( embed_dim=time_features, n_heads=1, encoder_layers=encoder_layers, decoder_layers=decoder_layers, fc_layers=fc_layers, norm_epsilon=norm_epsilon, dropout_rate=transformer_dropout_rate, fc_activation=fc_activation) self.recycle_fc = tf.keras.layers.Dense(pre_out_dim, activation=recycle_fc_activ) self.last_fc = tf.keras.layers.Dense(1, activation='sigmoid')
def __init__(self, embed_dim=16, n_heads=4, fc_activation="relu", encoder_layers=2, decoder_layers=2, fc_layers=3, norm_epsilon=1e-6, transformer_dropout_rate=0.2, pre_out_dim=512, out_dropout=0.3, recycle_fc_activ=tf.keras.activations.elu): super(ChordsDiscriminator, self).__init__() assert embed_dim % n_heads == 0, 'make sure: embed_dim % n_heads == 0' self.out_dropout = out_dropout self.embed_dim = embed_dim self.discr = transformer.Transformer( embed_dim=embed_dim, n_heads=n_heads, encoder_layers=encoder_layers, decoder_layers=decoder_layers, fc_layers=fc_layers, norm_epsilon=norm_epsilon, dropout_rate=transformer_dropout_rate, fc_activation=fc_activation) self.recycle_fc = tf.keras.layers.Dense(pre_out_dim, activation=recycle_fc_activ) self.last_fc = tf.keras.layers.Dense(1, activation='sigmoid')
def __init__(self, num_emb, emb_dim, hidden_dim, seq_len, batch_size, use_cuda, test_mode=False): super(Generator_attention, self).__init__() # Constants Initialization self.SOS_Index = 0 self.EOS_Index = 1 self.PAD_Index = 2 # Embeddings self.emb = nn.Embedding(num_emb, emb_dim) self.model = transformer.Transformer(self.emb, self.PAD_Index, self.emb.num_embeddings, max_seq_len=max(seq_len, seq_len)) self.test_mode = test_mode if not test_mode: self.data_loader = GenDataIter('inshorts_test/real.data', batch_size) self.data_loader.reset() """
def __init__(self, num_layers, d_model, num_heads, dff, kernel_size, dropout_rate, timesteps_in, timesteps_out, item, directory): self.timesteps_in = timesteps_in self.timesteps_out = timesteps_out # fixed transformer parameters input_vocab_size = 1 # there is only 1 feature target_vocab_size = 1 # there is only 1 feature pe_input = 10 * timesteps_in pe_target = 10 * timesteps_out # create transformer model self.model = transformer.Transformer(num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input, pe_target, kernel_size, dropout_rate) # training parameters self.weights_dir = directory + "TX_" + str(item) + "_k" + str(kernel_size) + \ "_dm" + str(d_model) + "_df" + str(dff) + "_l" + str(num_layers) + \ "_h" + str(num_heads) + "_weights.h5py" self.epochs = 30 self.threshold = 0.005 self.batch_size = 64 self.optimizer = tf.keras.optimizers.Adam(0.0001) self.train_loss = tf.keras.metrics.Mean(name='train_loss')
def __init__(self): trans = transformer.Transformer() self.r2b = trans.transform_matrix_of_frames( 'head_camera_rgb_optical_frame', 'base_link') self.camera = RGBD() self.camera.read_point_cloud() self.point_cloud = None
def __init__(self, i2w, use_knowledge, args, test=False): super(Transformer, self).__init__() self.args = args self.use_knowledge = use_knowledge # Vocab self.i2w = i2w self.w2i = {w: i for i, w in enumerate(i2w)} self.transformer = transformer.Transformer( len(i2w), len(i2w), src_pad_idx=self.w2i['_pad'], trg_pad_idx=self.w2i['_pad']) # Training if test: self.criterion = nn.CrossEntropyLoss(ignore_index=self.w2i['_pad'], reduction='sum') else: self.criterion = nn.CrossEntropyLoss(ignore_index=self.w2i['_pad']) self.optim = optim.Adam(lr=args.lr, params=self.parameters(), betas=(0.9, 0.997), eps=1e-09)
def transformer(bsize=None): import sys sys.path.insert(0, './transformer/') import transformer as transf from data import DatasetManager dm = DatasetManager("wmt14") dm.maybe_download_data_files() dm.load_vocab() transformer = transf.Transformer( num_heads=8, d_model=512, d_ff=2048, model_name="transformer", tf_sess_config=dict(allow_soft_placement=True) ) train_params = dict( learning_rate=1e-4, batch_size=bsize, seq_len=10, max_steps=300000, ) transformer.build_model("wmt14", dm.source_id2word, dm.target_id2word, 0,**train_params) loss = transformer._loss optimizer = tf.train.AdamOptimizer(learning_rate=0.2).minimize(tf.reduce_sum(loss)) return optimizer
def __init__(self, vocab_size, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, max_seq_length, pos_dropout, trans_dropout): """ Initializes the model Parameters: vocab_size (int): The amount of tokens in both vocabularies (including start, end, etc tokens) d_model (int): Expected number of features in the encoder/decoder inputs, also used in embeddings nhead (int): Number of heads in the transformer num_encoder_layers (int): Number of sub-encoder layers in the transformer num_decoder_layers (int): Number of sub-decoder layers in the transformer dim_feedforward (int): Dimension of the feedforward network in the transformer max_seq_length (int): Maximum length of each tokenized sentence pos_dropout (float): Dropout value in the positional encoding trans_dropout (float): Dropout value in the transformer """ super().__init__() self.d_model = d_model self.embedding = nn.Embedding(vocab_size, d_model) self.pos_enc = PositionalEncoding(d_model, pos_dropout, max_seq_length) # self.transformer = nn.Transformer(d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, trans_dropout) self.transformer = transformer.Transformer(d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, trans_dropout) self.fc = nn.Linear(d_model, vocab_size)
def __init__(self, embed_dim=16, n_heads=4, kernel_size=3, fc_activation="relu", encoder_layers=2, decoder_layers=2, fc_layers=3, norm_epsilon=1e-6, transformer_dropout_rate=0.2, pre_out_dim=512, out_dropout=0.3, recycle_fc_activ=tf.keras.activations.elu): super(Discriminator, self).__init__() assert embed_dim % n_heads == 0, 'make sure: embed_dim % n_heads == 0' self.out_dropout = out_dropout self.embed_dim = embed_dim self.tm_in_expand = tf.keras.layers.Conv1D(filters=embed_dim, kernel_size=kernel_size, strides=1, padding='same') self.discr = transformer.Transformer( embed_dim=embed_dim, n_heads=n_heads, encoder_layers=encoder_layers, decoder_layers=decoder_layers, fc_layers=fc_layers, norm_epsilon=norm_epsilon, dropout_rate=transformer_dropout_rate, fc_activation=fc_activation) self.comb_fc = tf.keras.layers.Dense(1) self.recycle_fc = tf.keras.layers.Dense(pre_out_dim, activation=recycle_fc_activ) self.last_fc = tf.keras.layers.Dense(1, activation='sigmoid')
def __init__(self, observation_space_size, action_space_size): self._hyperparameters = hyperparameters.Hyperparameters() self._trajectory_memory = memories.TrajectoryMemory() self._transformer = transformer.Transformer( self._hyperparameters.device) self._policy_network = neural_networks.PolicyNetwork( observation_space_size, action_space_size, self._hyperparameters.learning_rate, self._hyperparameters.device)
def test_nested_transformer_example_duplicates(self): input_list = [{ 'country': 'UK', 'city': 'London', 'currency': 'EUR', 'amount': 11.4 }, { 'country': 'UK', 'city': 'London', 'currency': 'EUR', 'amount': 8.9 }, { 'country': 'UK', 'city': 'London', 'currency': 'GBP', 'amount': 12.2 }, { 'country': 'UK', 'city': 'London', 'currency': 'FBP', 'amount': 10.9 }] nest_keys = ['currency', 'country', 'city'] res = { 'EUR': { 'UK': { 'London': [{ 'amount': 11.4 }, { 'amount': 8.9 }] } }, 'GBP': { 'UK': { 'London': [{ 'amount': 12.2 }] } }, 'FBP': { 'UK': { 'London': [{ 'amount': 10.9 }] } } } self.assertEqual( transformer.Transformer().nested_transformer( input_list, nest_keys), res)
def __init__(self, embed_dim=16, init_knl=3, strt_dim=5, n_heads=4, fc_activation=tf.keras.activations.relu, encoder_layers=2, decoder_layers=2, fc_layers=3, norm_epsilon=1e-6, transformer_dropout_rate=0.2, noise_std=0.5, max_pos=1000, pos_conv_knl=3): super(ChordsSynthesis, self).__init__() self.strt_dim = strt_dim self.embed_dim = embed_dim self.encoder_layers = encoder_layers self.decoder_layers = decoder_layers self.noise_std = noise_std self.init_fc = tf.keras.layers.Dense(embed_dim) self.init_ext = tf.keras.layers.Conv1DTranspose(filters=embed_dim, kernel_size=init_knl, strides=strt_dim) self.chords_extend = transformer.Transformer( embed_dim=embed_dim, n_heads=n_heads, encoder_layers=encoder_layers, decoder_layers=decoder_layers, fc_layers=fc_layers, norm_epsilon=norm_epsilon, dropout_rate=transformer_dropout_rate, fc_activation=fc_activation) self.b_fc = tf.keras.layers.Dense(embed_dim, kernel_initializer='he_normal', bias_initializer='zeros') self.g_fc = tf.keras.layers.Dense(embed_dim, kernel_initializer='he_normal', bias_initializer='ones') self.noise_en_fc = tf.keras.layers.Dense( embed_dim, kernel_initializer='he_normal', bias_initializer='zeros') self.noise_de_fc = tf.keras.layers.Dense( embed_dim, kernel_initializer='he_normal', bias_initializer='zeros') self.pos_enc = util.positional_encoding(max_pos, embed_dim) self.pos_comb_conv1 = tf.keras.layers.Conv1D(kernel_size=pos_conv_knl, filters=embed_dim, padding='same')
def run_model(config, vocab_enc, vocab_dec, vocab_de_bw, file): model = transformer.Transformer(config) model.to(config.device) model.load_state_dict(torch.load(file)) sys.stdout.write("enter: ") sys.stdout.flush() for line in fileinput.input(): line = line.strip() if 0 < len(line): exec_model(config, vocab_enc, vocab_dec, vocab_de_bw, model, line) sys.stdout.write("enter: ") sys.stdout.flush()
def MakeTransformer(cols_to_train, fixed_ordering, seed=None): return transformer.Transformer( num_blocks=args.blocks, d_model=args.dmodel, d_ff=args.dff, num_heads=args.heads, nin=len(cols_to_train), input_bins=[c.DistributionSize() for c in cols_to_train], use_positional_embs=True, activation=args.transformer_act, fixed_ordering=fixed_ordering, column_masking=args.column_masking, seed=seed, ).to(DEVICE)
def main(): # create an embedding matrix + randomly sample input as well as target sequence emb = nn.Embedding(VOCAB_SIZE, EMBEDDING_SIZE) input_seq = torch.from_numpy( np.random.randint(1, VOCAB_SIZE - 1, (INPUT_LEN, BATCH_SIZE))) target_seq = torch.from_numpy( np.random.randint(1, VOCAB_SIZE - 1, (INPUT_LEN, BATCH_SIZE))) # -> we assume that index 0 is the <PAD> token # create the models being compared recurrent_model = EncDecWithAttn(emb, HIDDEN_SIZE) transformer_model = transformer.Transformer( emb, # text_emb 0, # pad_index emb.num_embeddings, # output_size max_seq_len=INPUT_LEN, dim_model=HIDDEN_SIZE, num_layers=1) # move everything to the GPU, if possible if GPU: input_seq = input_seq.cuda() target_seq = target_seq.cuda() emb.cuda() recurrent_model.cuda() transformer_model.cuda() # measure how long it takes the recurrent model to process the data print("Testing the recurrent attention-based encoder-decoder model...") times = [] for idx in range(NUM_RUNS): start = time.time() recurrent_model(input_seq, target_seq) times.append(time.time() - start) print("Run {} finished in {:.3f}s".format(idx + 1, times[-1])) print("Avg. duration: {:.3f}s\n".format(np.mean(times))) # flip the first two dimensions of the data, as the transformer expects the first dimension to be the batch input_seq = input_seq.transpose(0, 1) target_seq = target_seq.transpose(0, 1) # measure how long it takes the transformer model to process the data print("Testing the transformer model...") times = [] for idx in range(NUM_RUNS): start = time.time() transformer_model(input_seq, target_seq) times.append(time.time() - start) print("Run {} finished in {:.3f}s".format(idx + 1, times[-1])) print("Avg. duration: {:.3f}s".format(np.mean(times)))
def execute(self): config = configparser.ConfigParser() config.read('Config') stagingpath = config.get('ConnectionString', 'stagingDbTransformer') nalandapath = config.get('ConnectionString', 'nalandaDbTransformer') # establish sink connection sinkDbEngine = create_engine(nalandapath) sinkConnection = sinkDbEngine.connect() date = sinkConnection.execute( 'select max(latest_date) from account_latestfetchdate').fetchall( )[0] if (date == None or date[0] == None): start_date = datetime.date(datetime.MINYEAR, 1, 1) else: date[0].strftime('%YYYY-%MM-%DD') start_date = date[0] logging.basicConfig(filename='Fetcher.log', level=logging.INFO) logging.info('Transformation started !') transformerObj = transformer.Transformer(stagingpath, nalandapath) transformerObj.sync_student_info() transformerObj.sync_class_info() transformerObj.sync_school_info() transformerObj.sync_content() transformerObj.completed_questions_aggregation_student(start_date) transformerObj.exercise_mastered_by_student(start_date) transformerObj.exercise_attempts_by_students(start_date) transformerObj.correct_questions_aggregation_student(start_date) transformerObj.attempted_questions_aggregation_student(start_date) transformerObj.mastery_level_aggregation_class(start_date) transformerObj.mastery_level_aggregation_school(start_date) transformerObj.exam_matrics(start_date) transformerObj.lesson_result(start_date) transformerObj.user_session_student(start_date) transformerObj.user_session_aggregation_class(start_date) transformerObj.user_session_aggregation_school(start_date) transformerObj.clear_resource() # sinkConnection.execute('insert into account_latestfetchdate(latest_date) values(NOW())') logging.basicConfig(filename='Fetcher.log', level=logging.INFO) logging.info('Transformation completed !')
def transformer_plumber(): start_date = "2017-07-10 00:00:00" test = transformer.Transformer( "mysql+mysqlconnector://root:@localhost/stagingtest", "mysql+mysqlconnector://root:@localhost/nalandatest") test.sync_student_info() test.sync_class_info() test.sync_school_info() test.sync_content() test.clear_log(start_date) test.completed_questions_aggregation_student(start_date) test.correct_questions_aggregation_student(start_date) test.attempted_questions_aggregation_student(start_date) test.completed_student(start_date) test.mastery_level_aggregation_class(start_date) test.mastery_level_aggregation_school(start_date) test.clear_resource()
def build_model(arch, src_vocab_size, tgt_vocab_size, embedding_dim, fcn_hidden_dim, num_heads, num_layers, dropout, src_to_tgt_vocab_conversion_matrix): """ Builds model. """ model = transformer.Transformer(src_vocab_size=src_vocab_size, tgt_vocab_size=tgt_vocab_size, embedding_dim=embedding_dim, fcn_hidden_dim=fcn_hidden_dim, num_heads=num_heads, num_layers=num_layers, dropout=dropout) \ if (arch == "transformer") \ else \ pointer_generator.PointerGeneratorTransformer(src_vocab_size=src_vocab_size, tgt_vocab_size=tgt_vocab_size, src_to_tgt_vocab_conversion_matrix=src_to_tgt_vocab_conversion_matrix, embedding_dim=embedding_dim, fcn_hidden_dim=fcn_hidden_dim, num_heads=num_heads, num_layers=num_layers, dropout=dropout) return model
def test_fixed_split_dims(self): k_max = 3 occurs = {"C": 7, "H": 9, "O": 4} species = get_species(occurs) kbody_terms = sorted( list( set(["".join(sorted(c)) for c in combinations(species, k_max)]))) split_dims = [] for kbody_term in kbody_terms: counter = Counter(get_atoms_from_kbody_term(kbody_term)) dims = [comb(occurs[e], k, True) for e, k in counter.items()] split_dims.append(np.prod(dims)) split_dims = [int(x) for x in split_dims] clf = transformer.Transformer(species=get_species({ "C": 2, "H": 4 }), k_max=k_max, kbody_terms=kbody_terms, split_dims=split_dims) self.assertListEqual(clf.kbody_sizes, [0, 4, 0, 12, 0, 0, 4, 0, 0, 0]) self.assertAlmostEqual(clf.binary_weights.sum(), 20.0, delta=0.0001) coords = get_example(6) features, _, _ = clf.transform(Atoms(clf.species, coords)) offsets = [0] + np.cumsum(clf.split_dims).tolist() selections = clf.kbody_selections self.assertEqual(features.shape[0], comb(20, k_max, exact=True)) ccc = features[offsets[0]:offsets[1], :] self.assertAlmostEqual(np.sum(ccc), 0.0, delta=epsilon) cch = features[offsets[1]:offsets[2], :] dists = pairwise_distances(coords[selections['CCH'][0]]) dists = dists[[0, 0, 1], [1, 2, 2]] lmat = [1.5, 1.07, 1.07] vsum = np.exp(-dists / np.asarray(lmat)).sum() self.assertAlmostEqual(vsum, cch[0].sum(), delta=epsilon) cco = features[offsets[2]:offsets[3], :] self.assertAlmostEqual(np.sum(cco), 0.0, delta=epsilon)
def __init__(self, observation_space_size, action_space_size): self._hyperparameters = hyperparameters.Hyperparameters() self._epsilon_decay_process = annealing_processes.EpsilonDecayProcess( self._hyperparameters.max_epsilon, self._hyperparameters.min_epsilon, self._hyperparameters.epsilon_decay_steps) self._replay_memory = memories.ReplayMemory( self._hyperparameters.memory_capacity, observation_space_size) self._transformer = transformer.Transformer( self._hyperparameters.device) self._online_network = neural_networks.DQN( observation_space_size, action_space_size, self._hyperparameters.learning_rate, self._hyperparameters.device) self._target_network = neural_networks.DQN( observation_space_size, action_space_size, self._hyperparameters.learning_rate, self._hyperparameters.device) self._target_network.eval() self._update_target_network() self._action_space_size = action_space_size self._step_counter = 0
def test_simple(self): coords = get_example(21) species = get_species({"Ta": 1, "B": 20}) k_max = 4 clf = transformer.Transformer(species, k_max=k_max) shape = clf.shape self.assertEqual(clf.ck2, comb(k_max, 2, exact=True)) self.assertListEqual(clf.split_dims, [4845, 1140]) self.assertListEqual(clf.kbody_sizes, clf.split_dims) self.assertAlmostEqual(clf.binary_weights.sum(), float(shape[0]), delta=0.0001) features, _, _ = clf.transform(Atoms(species, coords)) self.assertTupleEqual(features.shape, (5985, 6)) orders = np.argsort(features[0, :]).tolist() self.assertListEqual(orders, list(range(6))) orders = np.argsort(features[-1, [2, 4, 5]]).tolist() self.assertListEqual(orders, list(range(3))) orders = np.argsort(features[-1, [0, 1, 3]]).tolist() self.assertListEqual(orders, list(range(3)))
def test_fixed_kbody_terms(self): species = get_species({"C": 7, "H": 9, "N": 3}) k_max = 3 kbody_terms = sorted( list( set(["".join(sorted(c)) for c in combinations(species, k_max)]))) num_terms = len(kbody_terms) coords = get_example(5) clf = transformer.Transformer(species=get_species({ "C": 1, "H": 4 }), k_max=k_max, kbody_terms=kbody_terms) self.assertEqual(len(clf.split_dims), num_terms) self.assertEqual(len(clf.kbody_terms), num_terms) features, _, _ = clf.transform(Atoms(clf.species, coords)) self.assertTupleEqual(features.shape, (18, 3)) self.assertListEqual(clf.split_dims, [1, 1, 1, 6, 1, 1, 4, 1, 1, 1]) self.assertAlmostEqual(np.sum(features[0:3, :]), 0.0, delta=epsilon) d12 = np.linalg.norm(coords[1, :] - coords[2, :]) s12 = 0.64 self.assertAlmostEqual(features[3, 2], np.exp(-d12 / s12), delta=epsilon) selection = clf.kbody_selections['CHH'] self.assertEqual(len(selection), comb(4, 2, exact=True)) self.assertListEqual(selection[0], [0, 1, 2]) self.assertListEqual(selection[1], [0, 1, 3]) self.assertListEqual(selection[2], [0, 1, 4]) self.assertListEqual(selection[3], [0, 2, 3]) self.assertListEqual(selection[4], [0, 2, 4]) self.assertListEqual(selection[5], [0, 3, 4])
def predict(self, is_training): # initializer = tf.variance_scaling_initializer( # self.params["initializer_gain"], mode="fan_avg", distribution="uniform") with tf.variable_scope("tf_inference", reuse=tf.AUTO_REUSE): self.transformer = transformer.Transformer(self.config, is_training) self.attention_bias = model_utils.get_decoder_self_attention_bias( self.max_len) encoder_outputs = self.transformer.encode(self.inp, self.attention_bias) logits = self.transformer.embedding_softmax_layer.linear( encoder_outputs) loss = model_utils.soft_cross_entropy_loss( logits, self.inp, self.config['label_smoothing'], self.config['vocab_size']) weights = tf.sequence_mask(self.inp_len, self.max_len, dtype=tf.int32) loss = loss * tf.to_float(weights) loss = tf.reduce_sum(loss, axis=1) loss = loss / tf.to_float(self.inp_len) return loss
def test_ghost(self): species = get_species({"C": 7, "H": 9, "N": 3, "X": 1}) k_max = 3 kbody_terms = sorted( list( set(["".join(sorted(c)) for c in combinations(species, k_max)]))) coords = np.array([[0.15625000, 1.42857141, 0.00000000], [0.51290443, 0.41976140, 0.00000000], [0.51292284, 1.93296960, 0.87365150], [0.51292284, 1.93296960, -0.87365150], [-0.91375000, 1.42858459, 0.00000000]], dtype=np.float64) clf = transformer.Transformer(species=get_species({ "C": 1, "H": 4, "X": 1 }), k_max=k_max, kbody_terms=kbody_terms) features, _, _ = clf.transform(Atoms(clf.species, coords)) # 4CH + 6HH + 6CHH + 4HHH + (10 - 2) + (6 - 2) = 32 self.assertTupleEqual(features.shape, (32, 3))
def MakeModel(self, table, train_data, table_primary_index=None): cols_to_train = table.columns if self.factorize: cols_to_train = train_data.columns fixed_ordering = self.MakeOrdering(table) table_num_columns = table_column_types = table_indexes = None if isinstance(train_data, (common.SamplerBasedIterDataset, common.FactorizedSampleFromJoinIterDataset)): table_num_columns = train_data.table_num_columns table_column_types = train_data.combined_columns_types table_indexes = train_data.table_indexes print('table_num_columns', table_num_columns) print('table_column_types', table_column_types) print('table_indexes', table_indexes) print('table_primary_index', table_primary_index) if self.use_transformer: args = { 'num_blocks': 4, 'd_ff': 128, 'd_model': 32, 'num_heads': 4, 'd_ff': 64, 'd_model': 16, 'num_heads': 2, 'nin': len(cols_to_train), 'input_bins': [c.distribution_size for c in cols_to_train], 'use_positional_embs': False, 'activation': 'gelu', 'fixed_ordering': self.fixed_ordering, 'dropout': self.dropout, 'per_row_dropout': self.per_row_dropout, 'seed': None, 'join_args': { 'num_joined_tables': len(self.join_tables), 'table_dropout': self.table_dropout, 'table_num_columns': table_num_columns, 'table_column_types': table_column_types, 'table_indexes': table_indexes, 'table_primary_index': table_primary_index, } } args.update(self.transformer_args) model = transformer.Transformer(**args).to( train_utils.get_device()) else: model = MakeMade( table=table, scale=self.fc_hiddens, layers=self.layers, cols_to_train=cols_to_train, seed=self.seed, factor_table=train_data if self.factorize else None, fixed_ordering=fixed_ordering, special_orders=self.special_orders, order_content_only=self.order_content_only, order_indicators_at_front=self.order_indicators_at_front, inv_order=True, residual=self.residual, direct_io=self.direct_io, input_encoding=self.input_encoding, output_encoding=self.output_encoding, embed_size=self.embed_size, dropout=self.dropout, per_row_dropout=self.per_row_dropout, grouped_dropout=self.grouped_dropout if self.factorize else False, fixed_dropout_ratio=self.fixed_dropout_ratio, input_no_emb_if_leq=self.input_no_emb_if_leq, embs_tied=self.embs_tied, resmade_drop_prob=self.resmade_drop_prob, # DMoL: num_dmol=self.num_dmol, scale_input=self.scale_input if self.num_dmol else False, dmol_cols=self.dmol_cols if self.num_dmol else [], # Join specific: num_joined_tables=len(self.join_tables), table_dropout=self.table_dropout, table_num_columns=table_num_columns, table_column_types=table_column_types, table_indexes=table_indexes, table_primary_index=table_primary_index, ) return model
test_data = nsmc_dataset(ko_spm=ko_spm, nsmc_path=nsmc_test_path) test_data_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True) # model setting model = transformer.Transformer(n_src_vocab=ko_vocab_size, n_trg_vocab=None, src_pad_idx=0, trg_pad_idx=0, d_word_vec=128, d_model=128, d_inner=512, n_layers=3, n_head=4, d_k=32, d_v=32, dropout=0.1, n_position=256, trg_emb_prj_weight_sharing=True, emb_src_trg_weight_sharing=True) model = torch.nn.DataParallel(model, device_ids=[0]).cuda() finetuning_model = binary_classification(bart_model=model, freeze_bart=False, vocab_size=ko_vocab_size).cuda() optimizer = torch.optim.Adam( [{
m = 3 lr = 0.00035 context = 150 batch_size = 32 log_interval = 50 criterion = nn.NLLLoss() root = "data/wikitext-2" train_data = dataset.WikiText2(root, context, dataset.DatasetSplit.train) valid_data = dataset.WikiText2(root, context, dataset.DatasetSplit.valid) model = transformer.Transformer(context, train_data.word_count(), 400, 40, 900, heads, layers, tied_weights=True).to(device) train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True) def evaluate(data): model.eval() with torch.no_grad(): loss = 0.0 loader = torch.utils.data.DataLoader(dataset=data, batch_size=batch_size,
logits, targets, params["label_smoothing"], params["vocab_size_output"]) # assert xentropy.shape == weights.shape == (params['default_batch_size'], params['max_length_output']) # 取平均,对padding的位置不纳入计算 loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights) return loss # init data and dict print("Load data...") dataset, inp_lang, targ_lang, input_val, target_val = load_data.prepare_tfdata( "data/eng-spa.txt") # init model model = transformer.Transformer(params, train=False) # Test test_inputs, test_targets = input_val[:params[ "default_batch_size"]], target_val[:params["default_batch_size"]] test_logits = model(test_inputs, test_targets) print("TEST:", test_logits.shape) # Load weights model.load_weights(PATH) print("load weights.") # EVAL for i in range(6): print("----------\n Eval: ", i) eval_loss = compute_loss(m=model,
def test__is_terminal_element_failure(self): self.assertFalse(transformer.Transformer()._is_terminal_element( ['1', '2', '3'], '1'))
def test_nested_transformer_example_given(self): input_list = [{ 'country': 'US', 'city': 'Boston', 'currency': 'USD', 'amount': 100 }, { 'country': 'FR', 'city': 'Paris', 'currency': 'EUR', 'amount': 20 }, { 'country': 'FR', 'city': 'Lyon', 'currency': 'EUR', 'amount': 11.4 }, { 'country': 'ES', 'city': 'Madrid', 'currency': 'EUR', 'amount': 8.9 }, { 'country': 'UK', 'city': 'London', 'currency': 'GBP', 'amount': 12.2 }, { 'country': 'UK', 'city': 'London', 'currency': 'FBP', 'amount': 10.9 }] nest_keys = ['currency', 'country', 'city'] res = { 'USD': { 'US': { 'Boston': [{ 'amount': 100 }] } }, 'EUR': { 'FR': { 'Paris': [{ 'amount': 20 }], 'Lyon': [{ 'amount': 11.4 }] }, 'ES': { 'Madrid': [{ 'amount': 8.9 }] } }, 'GBP': { 'UK': { 'London': [{ 'amount': 12.2 }] } }, 'FBP': { 'UK': { 'London': [{ 'amount': 10.9 }] } } } self.assertEqual( transformer.Transformer().nested_transformer( input_list, nest_keys), res)