def testGeneratorInputFnWithXAsNonGeneratorFunction(self): x = np.arange(32, 36) with self.cached_session(): with self.assertRaisesRegexp(TypeError, 'x must be generator function'): failing_input_fn = generator_io.generator_input_fn( x, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn()
def testGeneratorInputFnWithMismatchinGeneratorKeys(self): def generator(): index = 0 yield {'a': np.ones(1) * index, 'b': np.ones(1) * index + 32, 'label': np.ones(1) * index - 32} index = 1 yield {'a': np.ones(1) * index, 'c': np.ones(1) * index + 32, 'label': np.ones(1) * index - 32} with self.test_session() as session: input_fn = generator_io.generator_input_fn( generator, target_key=None, batch_size=2, shuffle=False, num_epochs=1) features = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) with self.assertRaises(errors.OutOfRangeError): session.run([features]) with self.assertRaisesRegex(KeyError, 'key mismatch between dicts emitted' ' by GenFunExpected'): coord.request_stop() coord.join(threads)
def testGeneratorInputFnWithDifferentDimensionsOfFeatures(self): def generator(): for index in range(100): yield { 'a': np.ones((10, 10)) * index, 'b': np.ones((5, 5)) * index + 32, 'label': np.ones((3, 3)) * index - 32 } with self.cached_session() as session: input_fn = generator_io.generator_input_fn( generator, target_key='label', batch_size=2, shuffle=False, num_epochs=1) features, target = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], np.vstack((np.zeros((10, 10)), np.ones( (10, 10)))).reshape(2, 10, 10)) self.assertAllEqual(res[0]['b'], np.vstack((np.zeros((5, 5)), np.ones( (5, 5)))).reshape(2, 5, 5) + 32) self.assertAllEqual(res[1], np.vstack((np.zeros((3, 3)), np.ones( (3, 3)))).reshape(2, 3, 3) - 32) coord.request_stop() coord.join(threads)
def testGeneratorInputFnWithBatchLargerthanData(self): def generator(): for index in range(2): yield { 'a': np.ones(1) * index, 'b': np.ones(1) * index + 32, 'label': np.ones(1) * index - 32 } with self.cached_session() as session: input_fn = generator_io.generator_input_fn( generator, target_key=None, batch_size=4, shuffle=False, num_epochs=1) features = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) res = session.run(features) self.assertAllEqual(res['a'], np.asarray([0, 1, 0, 1]).reshape(-1, 1)) self.assertAllEqual(res['b'], np.asarray([32, 33, 32, 33]).reshape(-1, 1)) self.assertAllEqual(res['label'], np.asarray([-32, -31, -32, -31]).reshape(-1, 1)) with self.assertRaises(errors.OutOfRangeError): session.run([features]) coord.request_stop() coord.join(threads)
def predictor(): # now we want to predict! submission = dict() paths = ['./audio/recording.wav'] test_input_fn = generator_input_fn( x=test_data_generator(paths), batch_size=hparams.batch_size, shuffle=False, num_epochs=1, queue_capacity=10 * hparams.batch_size, num_threads=1) #the predict function being called model = create_model(config=run_config, hparams=hparams) it = model.predict(input_fn=test_input_fn) # last batch will contain padding, so remove duplicates for t in tqdm(it): fname, label = t['sample'].decode(), id2name[t['label']] submission[fname] = label os.system('clear') for fname, label in submission.items(): if label == 'stop': print('Exitting...') os.system('rm -rf ./audio/*') return True print('You said : {}\n'.format(label)) return False
def testGeneratorSingleInputFn(self): def generator(): for index in range(2): yield {'a': np.ones(1) * index} with self.test_session() as session: input_fn = generator_io.generator_input_fn(generator, target_key=None, batch_size=2, shuffle=False, num_epochs=1) features = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) res = session.run([features]) self.assertAllEqual(res[0]['a'], np.asarray([0, 1]).reshape(-1, 1)) session.run([features]) with self.assertRaises(errors.OutOfRangeError): session.run([features]) coord.request_stop() coord.join(threads)
def testGeneratorInputFnWithDifferentDimensionsOfFeatures(self): def generator(): for index in range(100): yield { 'a': np.ones((10, 10)) * index, 'b': np.ones((5, 5)) * index + 32, 'label': np.ones((3, 3)) * index - 32 } with self.cached_session() as session: input_fn = generator_io.generator_input_fn( generator, target_key='label', batch_size=2, shuffle=False, num_epochs=1) features, target = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], np.vstack((np.zeros((10, 10)), np.ones( (10, 10)))).reshape(2, 10, 10)) self.assertAllEqual(res[0]['b'], np.vstack((np.zeros((5, 5)), np.ones( (5, 5)))).reshape(2, 5, 5) + 32) self.assertAllEqual(res[1], np.vstack((np.zeros((3, 3)), np.ones( (3, 3)))).reshape(2, 3, 3) - 32) coord.request_stop() coord.join(threads)
def testGeneratorInputFnLabelDict(self): def generator(): for index in range(2): yield {'a': np.ones(1) * index, 'b': np.ones(1) * index + 32, 'label': np.ones(1) * index - 32, 'label2': np.ones(1) * index - 64, } with self.test_session() as session: input_fn = generator_io.generator_input_fn( generator, target_key=['label','label2'], batch_size=2, shuffle=False, num_epochs=1) features, target = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], np.asarray([0, 1]).reshape(-1, 1)) self.assertAllEqual(res[0]['b'], np.asarray([32, 33]).reshape(-1, 1)) self.assertAllEqual(res[1]['label'], np.asarray([-32, -31]).reshape(-1, 1)) self.assertAllEqual(res[1]['label2'], np.asarray([-64, -63]).reshape(-1, 1)) session.run([features]) with self.assertRaises(errors.OutOfRangeError): session.run([features, target]) coord.request_stop() coord.join(threads)
def testGeneratorInputFn(self): def generator(): for index in range(2): yield { 'a': np.ones(1) * index, 'b': np.ones(1) * index + 32, 'label': np.ones(1) * index - 32 } with self.cached_session() as session: input_fn = generator_io.generator_input_fn( generator, target_key='label', batch_size=2, shuffle=False, num_epochs=1) features, target = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], np.asarray([0, 1]).reshape(-1, 1)) self.assertAllEqual(res[0]['b'], np.asarray([32, 33]).reshape(-1, 1)) self.assertAllEqual(res[1], np.asarray([-32, -31]).reshape(-1, 1)) session.run([features]) with self.assertRaises(errors.OutOfRangeError): session.run([features, target]) coord.request_stop() coord.join(threads)
def testGeneratorInputFnWithXAsNonGeneratorFunction(self): x = np.arange(32, 36) with self.test_session(): with self.assertRaisesRegexp(TypeError, 'x must be generator function'): failing_input_fn = generator_io.generator_input_fn( x, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn()
def testGeneratorInputFnWithMismatchinGeneratorKeys(self): def generator(): index = 0 yield { 'a': np.ones(1) * index, 'b': np.ones(1) * index + 32, 'label': np.ones(1) * index - 32 } index = 1 yield { 'a': np.ones(1) * index, 'c': np.ones(1) * index + 32, 'label': np.ones(1) * index - 32 } with self.cached_session() as session: input_fn = generator_io.generator_input_fn( generator, target_key=None, batch_size=2, shuffle=False, num_epochs=1) features = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) with self.assertRaises(errors.OutOfRangeError): session.run([features]) with self.assertRaisesRegex(KeyError, 'key mismatch between dicts emitted' ' by GenFunExpected'): coord.request_stop() coord.join(threads)
def testGeneratorInputFnWithXAsNonGeneratorYieldingDicts(self): def generator(): yield np.arange(32, 36) with self.test_session(): with self.assertRaisesRegexp(TypeError, "x\(\) must yield dict"): failing_input_fn = generator_io.generator_input_fn( generator, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn()
def testGeneratorInputFnWithXAsNonGenerator(self): def generator(): return np.arange(32, 36) with self.test_session(): with self.assertRaisesRegexp(TypeError, 'x\(\) must be generator'): failing_input_fn = generator_io.generator_input_fn( generator, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn()
def testGeneratorInputFnWithXAsNonGenerator(self): def generator(): return np.arange(32, 36) with self.cached_session(): with self.assertRaisesRegexp(TypeError, r'x\(\) must be generator'): failing_input_fn = generator_io.generator_input_fn( generator, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn()
def main(args): # восстанавливаем сохраненые конфиги и словарь with open(os.path.join(args.modeldir, 'hparams.json'), 'r') as fin: params = json.load(fin) with open(os.path.join(args.modeldir, 'vocab.json'), 'r') as fin: vocab = json.load(fin) vocab = {int(k): v for k, v in vocab.items()} hparams = tf.contrib.training.HParams(**params) # все тот же костыль для некоторых машин session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True run_config = tf.estimator.RunConfig( model_dir=args.modeldir, session_config=session_config) # создаем модельку model = base.create_model(config=run_config, hparams=hparams) # готовим данные для теста из sample_submission df = pd.read_csv(os.path.join(args.datadir, 'sample_submission.csv')) df.label = 0 df.fname = [ os.path.join(args.datadir, 'audio_test', _) for _ in df.fname.values] # predict все равно работает по одному примеру, так что давайте уберем батчи # так мы сможем работать с записями целиком # NB: стоит проверить, правильно ли работает pad_value test_input_fn = generator_input_fn( x=utils.fast_datagenerator(df, params, 'test'), batch_size=1, shuffle=False, num_epochs=1, queue_capacity=hparams.batch_size, num_threads=1, pad_value=0.0, ) it = model.predict(input_fn=test_input_fn) # это итератор # далее немного грязно, отрефакторите, добавьте информацию о фолдах, если нужно submission = dict() for output in tqdm(it): path = output['fname'].decode() fname = os.path.basename(path) # допускается предсказывать три метки на каждую запись predicted = " ".join([vocab[i] for i in output['top3']]) submission[fname] = predicted with open(os.path.join(args.modeldir, 'submission.csv'), 'w') as fout: fout.write('fname,label\n') for fname, pred in submission.items(): fout.write("{},{}\n".format(fname, pred))
def get_test_input_function(self): val_input_fn = generator_input_fn( x=self.data_generator(self._dataset.get_test_files(), 1, 'test'), target_key=None, batch_size=1, shuffle=False, num_epochs=1, queue_capacity=3 * self._batch_size + 10, num_threads=1, ) return val_input_fn
def get_val_input_fn(self): val_input_fn = generator_input_fn( x=self.data_generator(self._dataset.get_val_files(), self._batch_size, 'val'), target_key=None, batch_size=self._batch_size, shuffle=True, num_epochs=1, queue_capacity=3 * self._batch_size + 10, num_threads=1, ) return val_input_fn
def get_val_input_fn(self): val_input_fn = generator_input_fn( x=self.data_generator(self._audio_preprocessor.get_val_files(), None, 'val'), target_key=self._feature_type.TARGET, batch_size=self._batch_size, shuffle=True, num_epochs=1, queue_capacity=3 * self._batch_size + 10, num_threads=1, ) return val_input_fn
def _get_test_input_function(self): test_wav_files_path = self.test_wav_files_path val_input_fn = generator_input_fn( x=self.data_generator(test_wav_files_path, None, 1, 'test'), target_key=None, batch_size=1, shuffle=False, num_epochs=1, queue_capacity=2000, num_threads=1, ) return val_input_fn
def testGeneratorInputFNWithTargetLabelListNotString(self): def generator(): for index in range(2): yield {'a': np.ones((10, 10)) * index, 'b': np.ones((5, 5)) * index + 32, 'label': np.ones((3, 3)) * index - 32} y = ["label", np.arange(10)] with self.test_session(): with self.assertRaisesRegexp(TypeError, 'target_key must be str or' ' Container of str'): failing_input_fn = generator_io.generator_input_fn( generator, target_key=y, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn()
def get_train_input_fn(self): train_input_fn = generator_input_fn( x=self.data_generator(self._audio_preprocessor.get_train_files(), None, 'train'), target_key=self._feature_type. TARGET, # you could leave target_key in features, so labels in model_handler will be empty batch_size=self._batch_size, shuffle=True, num_epochs=self._num_epochs, queue_capacity=3 * self._batch_size + 10, num_threads=1, ) return train_input_fn
def testGeneratorInputFNWithTargetLabelNotInDict(self): def generator(): for index in range(2): yield {'a': np.ones((10, 10)) * index, 'b': np.ones((5, 5)) * index + 32, 'label': np.ones((3, 3)) * index - 32} y = ["label", "target"] with self.test_session(): with self.assertRaisesRegexp(KeyError, 'target_key not in yielded dict'): failing_input_fn = generator_io.generator_input_fn( generator, target_key=y, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn()
def get_train_input_fn(self): train_input_fn = generator_input_fn( x=self.data_generator(self._dataset.get_train_files(), self._batch_size, 'train'), target_key= None, # you could leave target_key in features, so labels in model_handler will be empty batch_size=self._batch_size, shuffle=True, num_epochs=1, queue_capacity=3 * self._batch_size + 10, num_threads=1, ) return train_input_fn
def get_data_generator(code, hparams): # it's a magic function :) train_input_fn = generator_input_fn( x=data_generator_train(code), target_key= 'target', # you could leave target_key in features, so labels in model_handler will be empty batch_size=hparams.batch_size, shuffle=True, num_epochs=None, queue_capacity=3 * hparams.batch_size + 10, num_threads=10, ) val_input_fn = generator_input_fn( x=data_generator_val(code), target_key='target', batch_size=hparams.batch_size, shuffle=True, num_epochs=None, queue_capacity=3 * hparams.batch_size + 10, num_threads=1, ) return train_input_fn, val_input_fn
def _get_train_input_fn(self): train_wav_files_path = self.train_wav_files_path labels = self.train_labels train_input_fn = generator_input_fn( x=self.data_generator(train_wav_files_path, labels, self._batch_size, 'train'), target_key= None, # you could leave target_key in features, so labels in model_handler will be empty batch_size=self._batch_size, shuffle=True, num_epochs=1, queue_capacity=2000, num_threads=3, ) return train_input_fn
def _get_val_input_fn(self): train_wav_files_path = self.val_wav_files_path labels = self.val_labels val_input_fn = generator_input_fn( x=self.data_generator(train_wav_files_path, labels, self._batch_size, 'val'), target_key=None, batch_size=self._batch_size, shuffle=True, num_epochs=1, queue_capacity=2000, num_threads=3, ) return val_input_fn
def get_val_input_fn(self): val_input_fn = generator_input_fn( x=self.get_data( candidates=self._audio_preprocessor.get_val_files(), how_many=-1, offset=0, audio_sampling_settings=self._audio_sampling_settings, background_frequency=BACKGROUND_FREQUENCY, background_volume_range=BACKGROUND_VOLUME, time_shift=TIME_SHIFT_MS, mode="validation", sess=self._tf_sess), target_key=self._feature_type.TARGET, batch_size=self._batch_size, shuffle=True, num_epochs=1, queue_capacity=3 * self._batch_size + 10, num_threads=1, ) return val_input_fn
def get_train_input_fn(self): train_input_fn = generator_input_fn( x=self.get_data( candidates=self._audio_preprocessor.get_train_files(), how_many=-1, offset=0, audio_sampling_settings=self._audio_sampling_settings, background_frequency=BACKGROUND_FREQUENCY, background_volume_range=BACKGROUND_VOLUME, time_shift=TIME_SHIFT_MS, mode="training", sess=self._tf_sess), target_key=self._feature_type. TARGET, # you could leave target_key in features, so labels in model_handler will be empty batch_size=self._batch_size, shuffle=True, num_epochs=self._num_epochs, queue_capacity=3 * self._batch_size + 10, num_threads=1, ) return train_input_fn
def get_test_data_generator(code, hparams): def test_data_generator(code): def generator(): scaler = StandardScaler() print("loading val data set") df = pd.read_hdf('../hdf_201709/%s.h5' % code, 'table').reset_index() scaler.fit(df.iloc[0:1000, 2:]) df = pd.read_hdf('../hdf_201710/%s.h5' % code, 'table').reset_index() for i in range(120, 100000): cur_time = df.loc[i, '시간'][11:16] if cur_time > "15:20": continue if cur_time < "09:02": continue if df.iloc[i][2] < df.iloc[i + 60][2]: pred = 1 elif df.iloc[i][2] > df.iloc[i + 60][2]: pred = -1 else: pred = 0 yield dict(target=np.int32(pred), data=scaler.transform( np.array(df.iloc[i - 120:i, 2:])).reshape( 120, 67, 1).astype(np.float32), cur=df.iloc[i, 2], future=df.iloc[i + 60, 2], buy=df.iloc[i, 27], sell=df.iloc[i, 29]) return generator test_input_fn = generator_input_fn(x=test_data_generator(code), batch_size=hparams.batch_size, shuffle=False, num_epochs=1, queue_capacity=10 * hparams.batch_size, num_threads=1) return test_input_fn
) hparams = tf.contrib.training.HParams(**params) OUTDIR = './model-3' directory = os.path.join(OUTDIR, 'eval') if not os.path.exists(directory): os.makedirs(directory) #os.makedirs(os.path.join(OUTDIR, 'eval')) model_dir = OUTDIR run_config = tf.contrib.learn.RunConfig(model_dir=model_dir) from tensorflow.contrib.learn.python.learn.learn_io.generator_io import generator_input_fn test_input_fn = generator_input_fn( x=test_data_generator(paths), batch_size=hparams.batch_size, shuffle=False, num_epochs=1, queue_capacity=10 * hparams.batch_size, num_threads=1, ) # it's a magic function :) #from tensorflow.contrib.learn.python.learn.learn_io.generator_io import generator_input_fn model = create_model(config=run_config, hparams=hparams) it = model.predict(input_fn=test_input_fn) to_predict = 'yes no up down left right on off stop go'.split() #print(it.get_shape().as_list()) with open(os.path.join(model_dir, 'submission1.csv'), 'w') as fout: fout.write('fname,label\n') submission = dict()
fname=path, desired_samples=16000, fg_vol=1, bg_data=[], bg_vol=0, clip_min=-1.0, clip_max=1.0, time_shift_samples=0, ) result[FINGERPRINT_KEY]=getMfcc(getTransformedAudioLocal(**audio_options)) yield result test_input_fn = generator_input_fn( x=test_data_generator, batch_size=TEST_BATCH_SIZE, shuffle=False, num_epochs=1, queue_capacity= 10 * TEST_BATCH_SIZE, num_threads=1, ) model = create_estimator( config=RunConfig(model_dir=model_dir), hparams=HParams(**params), ) it = model.predict(input_fn=test_input_fn) submission = dict() for t in tqdm(it): fname, label = t['fname'].decode(), id2name[t['label']] submission[fname] = label
model_dir = OUTDIR run_config = tf.contrib.learn.RunConfig(model_dir=model_dir) # **Let's run training!** # In[ ]: # it's a magic function :) from tensorflow.contrib.learn.python.learn.learn_io.generator_io import generator_input_fn train_input_fn = generator_input_fn( x=data_generator(trainset, hparams, 'train'), target_key= 'target', # you could leave target_key in features, so labels in model_handler will be empty batch_size=hparams.batch_size, shuffle=True, num_epochs=None, queue_capacity=3 * hparams.batch_size + 10, num_threads=1, ) val_input_fn = generator_input_fn( x=data_generator(valset, hparams, 'val'), target_key='target', batch_size=hparams.batch_size, shuffle=True, num_epochs=None, queue_capacity=3 * hparams.batch_size + 10, num_threads=1, )
yield result except Exception as err: print(err, label_id, uid, fname) return generator ##========================================================= ## Actual computations start here ##========================================================= train_meta_list, val_meta_list = get_metadata_lists(DATADIR) train_input_fn = generator_input_fn( x=data_generator_fn(train_meta_list, 'train'), target_key=TARGET_KEY, batch_size=BATCH_SIZE, shuffle=True, num_epochs=None, queue_capacity=3 * BATCH_SIZE + 10, num_threads=1, ) val_input_fn = generator_input_fn( x=data_generator_fn(val_meta_list), target_key=TARGET_KEY, batch_size=BATCH_SIZE, shuffle=True, num_epochs=None, queue_capacity=3 * BATCH_SIZE + 10, num_threads=1, )
def main(_): # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Start a new TensorFlow session. sess = tf.InteractiveSession() # Begin by making sure we have the training data we need. If you already have # training data of your own, use `--data_url= ` on the command line to avoid # downloading. model_settings = models.prepare_model_settings( len(new_input_data.prepare_words_list(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) POSSIBLE_LABELS = new_input_data.prepare_words_list( FLAGS.wanted_words.split(',')) params = dict( # seed=2018, # batch_size=FLAGS.batch_size, # keep_prob=0.5, # learning_rate=0.0002, # clip_gradients=15.0, # use_batch_norm=True, # num_classes=len(POSSIBLE_LABELS) ) hparams = tf.contrib.training.HParams(**params) run_config = tf.contrib.learn.RunConfig() run_config = run_config.replace(model_dir=FLAGS.train_dir) audio_processor2 = prediction_input_data.AudioProcessor( FLAGS.data_dir, FLAGS.prediction_data_dir, model_settings) set_size = audio_processor2.set_size() print('prediction data size: ', set_size) def prediction_data_generator(): def generator(): for i in xrange(0, set_size, FLAGS.prediction_batch_size): # Pull the audio samples we'll use for testing. fname, fingerprints = \ audio_processor2.get_data(FLAGS.prediction_batch_size, i, model_settings, 0.0, 0.0, 0, sess) yield dict(fname=fname, fingerprint_input=fingerprints) return generator test_input_fn = generator_input_fn( x=prediction_data_generator(), batch_size=FLAGS.prediction_batch_size, queue_capacity=10 * FLAGS.prediction_batch_size, ) def model_fn(features, labels, mode, params): """Model function for Estimator.""" logits = models.create_model(tf.cast(features['fingerprint_input'], tf.float32), model_settings, FLAGS.model_architecture, is_training=False) # Provide an estimator spec for `ModeKeys.PREDICT`. if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'fname': tf.cast(features['fname'], tf.float32), 'label': tf.argmax(logits, axis=-1) } specs = dict(mode=mode, predictions=predictions) return tf.estimator.EstimatorSpec(**specs) def get_estimator(config, hparams): """Return the model as a Tensorflow Estimator object. Args: run_config (RunConfig): Configuration for Estimator run. params (HParams): hyperparameters. """ return tf.estimator.Estimator(model_fn=model_fn, config=config # params=hparams, ) estimator = get_estimator(config=run_config, hparams=hparams) it = estimator.predict(input_fn=test_input_fn) id2name = {i: name for i, name in enumerate(POSSIBLE_LABELS)} # last batch will contain padding, so remove duplicates submission = dict() for t in tqdm(it): fname, label = t['fname'].decode(), id2name[t['label']] # print("fname >>> : ", fname, ", ", "label >>> : ", label) submission[fname] = label # make submission.csv fout = open(os.path.join(FLAGS.result_dir, 'submission.csv'), 'w', encoding='utf-8', newline='') writer = csv.writer(fout) writer.writerow(['fname', 'label']) for key in sorted(submission.keys()): writer.writerow([key, submission[key]]) fout.close()
def main(args): # просто создадим две папки для текущего эксперимента exp и exp/eval try: os.makedirs(os.path.join(args.outdir, 'eval')) except OSError: pass df = pd.read_csv(os.path.join(args.datadir, 'train.csv')) labels = sorted(set(df.label.values)) label2id = {label: i for i, label in enumerate(labels)} id2label = {i: label for label, i in label2id.items()} df['label'] = [label2id[_] for _ in df.label.values] df['fname'] = [ os.path.join(args.datadir, 'audio_train', _) for _ in df.fname.values ] # todo: разберитесь с форматом входных данных, потюньте процедуру разбиения # можно добавить фолды, балансировать классы или разбивать по флагу ручной разметки idx = np.arange(len(df)) idx_train, idx_val = train_test_split(idx, test_size=0.33, random_state=2018, shuffle=True) df_train, df_val = df.iloc[idx_train], df.iloc[idx_val] params = dict(num_classes=len(labels)) params.update(**args.__dict__) hparams = tf.contrib.training.HParams(**params) # сохраним два файла: с параметрами модели, пригодится, когда параметры будут определять строение сетки with open(os.path.join(args.outdir, 'hparams.json'), 'w') as fout: json.dump(params, fout, indent=2) # словарь с метками для обратного преобразования with open(os.path.join(args.outdir, 'vocab.json'), 'w') as fout: json.dump(id2label, fout, indent=2) # маленький странный костыль, нужен не на всех машинах. # На некоторых помогает от странной ошибки CUDNN session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True run_config = tf.estimator.RunConfig(model_dir=args.outdir, session_config=session_config) # Написано что deprecated, но простой замены пока не нашлось, если придумаете -- напишите :) train_input_fn = generator_input_fn( x=utils.fast_datagenerator(df_train, hparams, 'train'), target_key='target', batch_size=hparams.batch_size, shuffle=True, num_epochs=10, queue_capacity=3 * hparams.batch_size, num_threads=1, ) val_input_fn = generator_input_fn( x=utils.fast_datagenerator(df_val, hparams, 'val'), target_key='target', batch_size=hparams.batch_size, shuffle=False, num_epochs=None, queue_capacity=3 * hparams.batch_size, num_threads=1, ) # создаем модельку и треним ее est = base.create_model(config=run_config, hparams=hparams) train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn) eval_spec = tf.estimator.EvalSpec(input_fn=val_input_fn) tf.estimator.train_and_evaluate(est, train_spec, eval_spec)