def work_dir(self, test_name): """Ensure a working directory has been created for this test. Args: test_name (str) : name of directory """ directory = os.path.join(self.testset_work_dir, test_name) util.maybe_create_dir(directory) return directory
def work_dir(self, test_name): """Set up directory for a single test. Args: test_name (str): name of test (will be taken from stem of the name of the current file - see setupClass()) Returns: str : directory path for files""" directory = os.path.join(self.testset_work_dir, test_name) util.maybe_create_dir(directory) return directory
def work_dir(self, test_name): """ Creates new directory in workspace if doesn't already exist Args: test_name (str): name of directory to create Returns: str: path of directory created """ directory = os.path.join(self.testset_work_dir, test_name) util.maybe_create_dir(directory) return directory
def work_dir(self, test_name): directory = os.path.join(self.testset_work_dir, test_name) util.maybe_create_dir(directory) return directory
print( " max_epoch : {}".format(max_epoch) ) print( " max_line_length : {}".format(max_line_length) ) print( " train_file : {}".format(train_file) ) print( " valid_file : {}".format(valid_file) ) print( " test_file : {}".format(test_file) ) print( " vocab_file : {}".format(vocab_file) ) print( " save_dir : {}".format(save_dir) ) print( " encoding : {}".format(encoding) ) if gpu >= 0: cuda.get_device(gpu).use() xp = np if gpu < 0 else cuda.cupy maybe_create_dir(save_dir) print(' load vocab from {} ...'.format(vocab_file) ) vocab = Vocab().load_pack(open(vocab_file, 'rb'), encoding=encoding) vocab_size = len(vocab) print(' vocab size: {}'.format(vocab_size) ) train_batches = MinibatchFeeder(open(train_file, 'rb'), batch_size=batch_size, max_line_length=max_line_length) train_head_batches = MinibatchFeeder(open(train_file, 'rb'), batch_size=batch_size, max_line_length=max_line_length, max_num_lines=1000) valid_batches = MinibatchFeeder(open(valid_file, 'rb'), batch_size=batch_size, max_line_length=max_line_length) print( "train : {} lines".format(train_batches.num_epoch_lines) ) print( "train(head): {} lines".format(train_head_batches.num_epoch_lines) )
parser.add_argument('--encoding', '-e', type=str, required=False, default='utf-8', help='encoding') result = parser.parse_args() assert( result.min_count >= 0 ) return result args = parse_args(sys.argv) train_file = args.train_file valid_file = args.valid_file test_file = args.test_file save_dir = args.save_dir min_count = args.min_count max_vocab = args.max_vocab encoding = args.encoding maybe_create_dir(os.path.dirname(train_file)) print( "collect vocabulary ..." ) vocab = create_vocab([codecs.open(train_file, encoding=encoding)], min_count=min_count, max_vocab=max_vocab) vocab_pack_file = os.path.join(save_dir, 'vocab.pack') print( "save vocab to {} ...".format(vocab_pack_file) ) vocab.save_pack(open(vocab_pack_file, 'wb')) train_pack_file = os.path.join(os.path.dirname(train_file), 'train.pack') print( "encode and pack train file {} to {} ...".format(train_file, train_pack_file) ) encode_and_pack(vocab, fin=codecs.open(train_file, 'r', encoding=encoding), fout=open(train_pack_file, 'wb')) if valid_file: