def init_cfg_training(self, grammar=None): from utils.oracle.OracleCfg import OracleCfg oracle = OracleCfg(sequence_length=self.sequence_length, cfg_grammar=grammar) self.set_oracle(oracle) self.oracle.generate_oracle() self.vocab_size = self.oracle.vocab_size + 1 generator = Generator(num_vocabulary=self.vocab_size, batch_size=self.batch_size, emb_dim=self.emb_dim, hidden_dim=self.hidden_dim, sequence_length=self.sequence_length, start_token=self.start_token) self.set_generator(generator) discriminator = Discriminator(sequence_length=self.sequence_length, num_classes=2, vocab_size=self.vocab_size, emd_dim=self.emb_dim, filter_sizes=self.filter_size, num_filters=self.num_filters, l2_reg_lambda=self.l2_reg_lambda) self.set_discriminator(discriminator) gen_dataloader = DataLoader(batch_size=self.batch_size, seq_length=self.sequence_length) oracle_dataloader = DataLoader(batch_size=self.batch_size, seq_length=self.sequence_length) dis_dataloader = DisDataloader(batch_size=self.batch_size, seq_length=self.sequence_length) self.set_data_loader(gen_loader=gen_dataloader, dis_loader=dis_dataloader, oracle_loader=oracle_dataloader) return oracle.wi_dict, oracle.iw_dict
def init_real_trainng(self, data_loc=None): from utils.text_process import text_precess, text_to_code from utils.text_process import get_tokenlized, get_word_list, get_dict from utils.text_process import get_dict if data_loc is None: data_loc = 'data/image_coco.txt' self.sequence_length, self.vocab_size = text_precess(data_loc) generator = Generator(num_vocabulary=self.vocab_size, batch_size=self.batch_size, emb_dim=self.emb_dim, hidden_dim=self.hidden_dim, sequence_length=self.sequence_length, start_token=self.start_token) self.set_generator(generator) discriminator = Discriminator(sequence_length=self.sequence_length, num_classes=2, vocab_size=self.vocab_size, emd_dim=self.emb_dim, filter_sizes=self.filter_size, num_filters=self.num_filters, batch_size=self.batch_size, l2_reg_lambda=self.l2_reg_lambda) self.set_discriminator(discriminator) gen_dataloader = DataLoader(batch_size=self.batch_size, seq_length=self.sequence_length) oracle_dataloader = None dis_dataloader = DisDataloader(batch_size=self.batch_size, seq_length=self.sequence_length) self.set_data_loader(gen_loader=gen_dataloader, dis_loader=dis_dataloader, oracle_loader=oracle_dataloader) tokens = get_tokenlized(data_loc) word_set = get_word_list(tokens) [word_index_dict, index_word_dict] = get_dict(word_set) with open(self.oracle_file, 'w') as outfile: outfile.write(text_to_code(tokens, word_index_dict, self.sequence_length)) return word_index_dict, index_word_dict
def init_oracle_trainng(self, oracle=None): if oracle is None: oracle = OracleLstm(num_vocabulary=self.vocab_size, batch_size=self.batch_size, emb_dim=self.emb_dim, hidden_dim=self.hidden_dim, sequence_length=self.sequence_length, start_token=self.start_token) self.set_oracle(oracle) generator = Generator(num_vocabulary=self.vocab_size, batch_size=self.batch_size, emb_dim=self.emb_dim, hidden_dim=self.hidden_dim, sequence_length=self.sequence_length, start_token=self.start_token) self.set_generator(generator) discriminator = Discriminator(sequence_length=self.sequence_length, num_classes=2, vocab_size=self.vocab_size, emd_dim=self.emb_dim, filter_sizes=self.filter_size, num_filters=self.num_filters, batch_size=self.batch_size, l2_reg_lambda=self.l2_reg_lambda) self.set_discriminator(discriminator) gen_dataloader = DataLoader(batch_size=self.batch_size, seq_length=self.sequence_length) oracle_dataloader = DataLoader(batch_size=self.batch_size, seq_length=self.sequence_length) dis_dataloader = DisDataloader(batch_size=self.batch_size, seq_length=self.sequence_length) self.set_data_loader(gen_loader=gen_dataloader, dis_loader=dis_dataloader, oracle_loader=oracle_dataloader)