def construct_dir(self): self.model_dir = utils.construct_dir(prefix=self.args.model_dir, args=self.args, create_dataset_name=False) self.summary_dir = utils.construct_dir(prefix=self.args.summary_dir, args=self.args, create_dataset_name=False) self.out_path = utils.construct_dir(prefix=self.args.result_dir, args=self.args, create_dataset_name=False) + '.txt' if not os.path.exists(self.args.result_dir): os.makedirs(self.args.result_dir) if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) if not os.path.exists(self.summary_dir): os.makedirs(self.summary_dir)
def construct_out_dir(self): self.result_dir = utils.construct_dir(prefix=self.args.result_dir, args=self.args) self.model_dir = os.path.join(self.result_dir, 'models') self.out_path = os.path.join(self.result_dir, 'result.txt') self.summary_dir = utils.construct_dir(prefix=self.args.summary_dir, args=self.args) self.image_dir = utils.construct_dir(prefix='images', args=self.args) if not os.path.exists(self.summary_dir): os.makedirs(self.summary_dir) self.writer = SummaryWriter(log_dir=self.summary_dir) if not os.path.exists(self.model_dir): os.makedirs(self.model_dir)
def construct_data(self): self.data_dir = os.path.join(self.args.data_dir, self.args.dataset) self.dataset_name = utils.construct_dir(prefix=self.args.dataset, args=self.args, create_dataset_name=True) dataset_file_name = os.path.join(self.data_dir, self.dataset_name) if not os.path.exists(dataset_file_name): if self.args.dataset == 'imdb': self.text_data = IMDBData(args=self.args) elif self.args.dataset == 'agnews': self.text_data = AGNewsData(args=self.args) else: print('Cannot recognize {}'.format(self.args.dataset)) raise NotImplementedError with open(dataset_file_name, 'wb') as datasetFile: p.dump(self.text_data, datasetFile) print('dataset created and saved to {}, exiting ...'.format(dataset_file_name)) exit(0) else: with open(dataset_file_name, 'rb') as datasetFile: self.text_data = p.load(datasetFile) print('dataset loaded from {}'.format(dataset_file_name)) # construct dataset self.text_data.construct_dataset(max_steps=self.args.max_steps) self.train_loader = DataLoader(dataset=self.text_data.training_set_all, num_workers=self.args.num_worker, batch_size=self.args.batch_size, shuffle=True) self.val_loader = DataLoader(dataset=self.text_data.val_set, num_workers=self.args.num_worker, batch_size=self.args.batch_size, shuffle=False) self.test_loader = DataLoader(dataset=self.text_data.test_set, num_workers=self.args.num_worker, batch_size=self.args.batch_size, shuffle=False)
def construct_out_dir(self): self.model_dir = utils.construct_dir(prefix=self.args.model_dir, args=self.args, create_dataset_name=False) self.out_dir = utils.construct_dir(prefix=self.args.test_dir, args=self.args, create_dataset_name=False) self.result_file = self.model_dir.split('/')[-1] self.out_path = os.path.join(self.args.result_dir, self.result_file) if not os.path.exists(self.args.result_dir): os.makedirs(self.args.result_dir) if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) if not os.path.exists(self.out_dir): os.makedirs(self.out_dir)
def construct_out_dir(self): self.result_dir = utils.construct_dir(prefix=self.args.result_dir, args=self.args) self.out_image_dir = os.path.join(self.result_dir, 'images') self.model_dir = os.path.join(self.result_dir, 'models') self.out_path = os.path.join(self.result_dir, 'result.txt') if not os.path.exists(self.out_image_dir): os.makedirs(self.out_image_dir) if not os.path.exists(self.model_dir): os.makedirs(self.model_dir)
def construct_data(self): self.data_dir = os.path.join(self.args.data_dir, self.args.dataset) self.dataset_name = utils.construct_dir(prefix=self.args.dataset, args=self.args, create_dataset_name=True) dataset_file_name = os.path.join(self.data_dir, self.dataset_name) if not os.path.exists(dataset_file_name): if self.args.dataset == 'rotten': self.text_data = RottenData(args=self.args) elif self.args.dataset == 'congress': self.text_data = CongressData(args=self.args) else: print('Cannot recognize {}'.format(self.args.dataset)) raise NotImplementedError with open(dataset_file_name, 'wb') as datasetFile: p.dump(self.text_data, datasetFile) print('dataset created and saved to {}, exiting ...'.format( dataset_file_name)) exit(0) else: with open(dataset_file_name, 'rb') as datasetFile: self.text_data = p.load(datasetFile) print('dataset loaded from {}'.format(dataset_file_name)) self.text_data.construct_dataset(elmo=self.args.elmo) self.train_loader = DataLoader(dataset=self.text_data.training_dataset, num_workers=1, batch_size=self.args.batch_size, shuffle=False) self.val_loader = DataLoader(dataset=self.text_data.val_dataset, num_workers=1, batch_size=self.args.test_batch_size, shuffle=False) self.test_loader = DataLoader(dataset=self.text_data.test_dataset, num_workers=1, batch_size=self.args.test_batch_size, shuffle=False)
def compute_dist(self): self.vocab = self.text_data.get_vocab() dists = compute_dist(vocab=self.vocab, word2id=self.text_data.word2id, id2word=self.text_data.id2word, embedding_file='counter-fitted-vectors.txt') dist_name = utils.construct_dir(prefix=self.args.dataset, args=self.args, create_dist_name=True) with open(dist_name, 'wb') as file: p.dump(dists, file)