예제 #1
0
	def construct_dir(self):
		self.model_dir = utils.construct_dir(prefix=self.args.model_dir, args=self.args, create_dataset_name=False)
		self.summary_dir = utils.construct_dir(prefix=self.args.summary_dir, args=self.args, create_dataset_name=False)
		self.out_path = utils.construct_dir(prefix=self.args.result_dir, args=self.args, create_dataset_name=False) + '.txt'

		if not os.path.exists(self.args.result_dir):
			os.makedirs(self.args.result_dir)

		if not os.path.exists(self.model_dir):
			os.makedirs(self.model_dir)

		if not os.path.exists(self.summary_dir):
			os.makedirs(self.summary_dir)
예제 #2
0
    def construct_out_dir(self):
        self.result_dir = utils.construct_dir(prefix=self.args.result_dir,
                                              args=self.args)
        self.model_dir = os.path.join(self.result_dir, 'models')
        self.out_path = os.path.join(self.result_dir, 'result.txt')
        self.summary_dir = utils.construct_dir(prefix=self.args.summary_dir,
                                               args=self.args)
        self.image_dir = utils.construct_dir(prefix='images', args=self.args)

        if not os.path.exists(self.summary_dir):
            os.makedirs(self.summary_dir)
        self.writer = SummaryWriter(log_dir=self.summary_dir)

        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
예제 #3
0
	def construct_data(self):
		self.data_dir = os.path.join(self.args.data_dir, self.args.dataset)
		self.dataset_name = utils.construct_dir(prefix=self.args.dataset, args=self.args, create_dataset_name=True)
		dataset_file_name = os.path.join(self.data_dir, self.dataset_name)

		if not os.path.exists(dataset_file_name):
			if self.args.dataset == 'imdb':
				self.text_data = IMDBData(args=self.args)
			elif self.args.dataset == 'agnews':
				self.text_data = AGNewsData(args=self.args)
			else:
				print('Cannot recognize {}'.format(self.args.dataset))
				raise NotImplementedError

			with open(dataset_file_name, 'wb') as datasetFile:
				p.dump(self.text_data, datasetFile)
			print('dataset created and saved to {}, exiting ...'.format(dataset_file_name))
			exit(0)
		else:
			with open(dataset_file_name, 'rb') as datasetFile:
				self.text_data = p.load(datasetFile)
			print('dataset loaded from {}'.format(dataset_file_name))
		# construct dataset
		self.text_data.construct_dataset(max_steps=self.args.max_steps)

		self.train_loader = DataLoader(dataset=self.text_data.training_set_all, num_workers=self.args.num_worker, batch_size=self.args.batch_size, shuffle=True)
		self.val_loader = DataLoader(dataset=self.text_data.val_set, num_workers=self.args.num_worker, batch_size=self.args.batch_size, shuffle=False)
		self.test_loader = DataLoader(dataset=self.text_data.test_set, num_workers=self.args.num_worker, batch_size=self.args.batch_size, shuffle=False)
예제 #4
0
    def construct_out_dir(self):
        self.model_dir = utils.construct_dir(prefix=self.args.model_dir,
                                             args=self.args,
                                             create_dataset_name=False)
        self.out_dir = utils.construct_dir(prefix=self.args.test_dir,
                                           args=self.args,
                                           create_dataset_name=False)
        self.result_file = self.model_dir.split('/')[-1]
        self.out_path = os.path.join(self.args.result_dir, self.result_file)

        if not os.path.exists(self.args.result_dir):
            os.makedirs(self.args.result_dir)

        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)

        if not os.path.exists(self.out_dir):
            os.makedirs(self.out_dir)
예제 #5
0
    def construct_out_dir(self):
        self.result_dir = utils.construct_dir(prefix=self.args.result_dir,
                                              args=self.args)
        self.out_image_dir = os.path.join(self.result_dir, 'images')
        self.model_dir = os.path.join(self.result_dir, 'models')
        self.out_path = os.path.join(self.result_dir, 'result.txt')

        if not os.path.exists(self.out_image_dir):
            os.makedirs(self.out_image_dir)

        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
예제 #6
0
    def construct_data(self):
        self.data_dir = os.path.join(self.args.data_dir, self.args.dataset)
        self.dataset_name = utils.construct_dir(prefix=self.args.dataset,
                                                args=self.args,
                                                create_dataset_name=True)
        dataset_file_name = os.path.join(self.data_dir, self.dataset_name)

        if not os.path.exists(dataset_file_name):
            if self.args.dataset == 'rotten':
                self.text_data = RottenData(args=self.args)
            elif self.args.dataset == 'congress':
                self.text_data = CongressData(args=self.args)
            else:
                print('Cannot recognize {}'.format(self.args.dataset))
                raise NotImplementedError

            with open(dataset_file_name, 'wb') as datasetFile:
                p.dump(self.text_data, datasetFile)
            print('dataset created and saved to {}, exiting ...'.format(
                dataset_file_name))
            exit(0)
        else:
            with open(dataset_file_name, 'rb') as datasetFile:
                self.text_data = p.load(datasetFile)
            print('dataset loaded from {}'.format(dataset_file_name))

        self.text_data.construct_dataset(elmo=self.args.elmo)

        self.train_loader = DataLoader(dataset=self.text_data.training_dataset,
                                       num_workers=1,
                                       batch_size=self.args.batch_size,
                                       shuffle=False)
        self.val_loader = DataLoader(dataset=self.text_data.val_dataset,
                                     num_workers=1,
                                     batch_size=self.args.test_batch_size,
                                     shuffle=False)
        self.test_loader = DataLoader(dataset=self.text_data.test_dataset,
                                      num_workers=1,
                                      batch_size=self.args.test_batch_size,
                                      shuffle=False)
예제 #7
0
	def compute_dist(self):
		self.vocab = self.text_data.get_vocab()
		dists = compute_dist(vocab=self.vocab, word2id=self.text_data.word2id, id2word=self.text_data.id2word, embedding_file='counter-fitted-vectors.txt')
		dist_name = utils.construct_dir(prefix=self.args.dataset, args=self.args, create_dist_name=True)
		with open(dist_name, 'wb') as file:
			p.dump(dists, file)