def run_internal_eval(eval_model, eval_sess, model_dir, hps, summary_writer, use_test_set=False): """Compute internal evaluation (perplexity) for both dev / test.""" with eval_model.graph.as_default(): loaded_eval_model, global_step = model_helper.create_or_load_model( eval_model.model, model_dir, eval_sess, "eval") dev_src_files, dev_tgt_files = data.get_files(hps.data_dir, hps.test_prefix) dev_eval_iterator_feed_dict = { eval_model.src_file_placeholder: dev_src_files, eval_model.tgt_file_placeholder: dev_tgt_files } dev_ppl = _internal_eval(loaded_eval_model, global_step, eval_sess, eval_model.iterator, dev_eval_iterator_feed_dict, summary_writer, "dev") test_ppl = None if use_test_set and hps.test_prefix: test_src_files, test_tgt_files = data.get_files( hps.data_dir, hps.test_prefix) test_eval_iterator_feed_dict = { eval_model.src_file_placeholder: test_src_files, eval_model.tgt_file_placeholder: test_tgt_files } test_ppl = _internal_eval(loaded_eval_model, global_step, eval_sess, eval_model.iterator, test_eval_iterator_feed_dict, summary_writer, "test") return dev_ppl, test_ppl
def get_tag_files(overwrite, score_mocs): """ Get .tags files. """ tag_files = get_files('tags', 'tags') if overwrite or score_mocs: return tag_files else: score_files_bn = get_basenames(get_files('scores', 'scores')) return [f for f in tag_files if not has_score_file(f, score_files_bn)]
def test(): test_data_lists = get_files(config.test_data,"test") test_datagen = data_generator(test_data_lists,"test",augument=False).create_train() model = get_model() model.load_weights(config.weights_path) predicted_labels = np.argmax(model.predict_generator(test_datagen,steps=len(test_data_lists) / 16),axis=-1) print(predicted_labels)
def get_dataset(): print(FLAGS.files_checkpoint) print(FLAGS.train_subjects) dataset_files = get_files(checkpoint=FLAGS.files_checkpoint, train_subjects=FLAGS.train_subjects) dataset = get_objects(dataset_files) add_extra_dims(dataset) return dataset
def main(filelist): filenames = data.get_files(filelist) mean,std = compute_mean_std(filenames) print('channel mean') print(mean) print('channel std') print(std)
def load_scores(n=20): """ Create a csv file containing the ranks for the top n hashtags, according to the .scores files. n ... number of top/bottom ranked hashtags to include. """ # ranks: spans -> day -> tag -> score. E.g., 30 -> 2014-05-09 -> obamacare -> 10.5 result = defaultdict(lambda: {}) for score_file in get_files('scores', 'scores'): print score_file day, timespan = parse_filename(score_file) scores = read_scores(score_file, n) result[timespan][day] = scores return result
def train(): model = get_model() train_generator,validation_generator = get_files("train") model.fit_generator( train_generator, steps_per_epoch= config.nb_train_samples // config.batch_size, epochs= config.epochs, validation_data=validation_generator, validation_steps=config.nb_validation_samples // config.batch_size) model.save_weights('./checkpoints/first_try.h5')
def main(): args = docopt(__doc__) today = datetime.now() timespans = parse_timespans(args['-t']) files = get_files('jsons', 'json') ndays = int(args['-d']) handles = set(twitter_handle_to_party().keys()) for day in range(ndays): ids_seen = set() tags_list = dict([(timespan, defaultdict(lambda: Counter())) for timespan in timespans]) thisday = today - timedelta(days=day) print 'pretending today is %s' % thisday.strftime('%Y-%m-%d') for f in files: parse(f, tags_list, timespans, thisday, ids_seen, handles) outfiles = open_outfiles(thisday, timespans) for outfile, span in zip(outfiles, timespans): write_tags(outfile, tags_list[span])
def main(filelist): filenames = data.get_files(filelist) bs = 1000 batches = [filenames[i * bs : (i + 1) * bs] for i in range(int(len(filenames) / bs) + 1)] Us, evs = [], [] for batch in batches: images = np.array([data.load_augment(f, 256, 256) for f in batch]) X = images.transpose(0, 2, 3, 1).reshape(-1, 3) cov = np.dot(X.T, X) / X.shape[0] U, S, V = np.linalg.svd(cov) ev = np.sqrt(S) Us.append(U) evs.append(ev) print('U') print(np.mean(Us, axis=0)) print('eigenvalues') print(np.mean(evs, axis=0))
def train(callbacks): #1. compile print("--> Compiling the model...") model = get_model() # load raw train data raw_train_data_lists = get_files(config.train_data,"train") #split raw train data to train and val train_data_lists,val_data_lists = train_test_split(raw_train_data_lists,test_size=0.3) # for train train_datagen = data_generator(train_data_lists,"train",augument=True).create_train() #embed() # val data val_datagen = data_generator(val_data_lists,"val",augument=True).create_train() # if model can predict better on augumented data ,the model should be more reboust history = model.fit_generator( train_datagen, validation_data = val_datagen, epochs = config.epochs, verbose = 1, callbacks = callbacks, steps_per_epoch=len(train_data_lists) // config.batch_size, validation_steps=len(val_data_lists) // config.batch_size )
print('Iteration %d, variance = %.4f' % (iteration, variance_value)) if not condition: break def get_param_count(): all_params = 0 for variable in tf.trainable_variables(): variable_params = np.prod(variable.get_shape().as_list()) all_params += variable_params print('%s: %d' % (variable.name, variable_params)) print ('TOTAL: %d' % all_params) files = {phase: data.get_files(phase) for phase in data._PHASES} net = get_net() stat = get_stat() sess = tf.Session() with sess.as_default(): model = util.Model(net['global_step']) saver = tf.train.Saver(tf.all_variables(), max_to_keep=32, keep_checkpoint_every_n_hours=2) summary_writer = tf.train.SummaryWriter(SUMMARY_PATH) checkpoint = tf.train.get_checkpoint_state(SAVE_DIR) if checkpoint: print('[ Model restored from %s ]' % checkpoint.model_checkpoint_path) saver.restore(sess, checkpoint.model_checkpoint_path) else: print('[ Model initialized ]')
collections=[tf.GraphKeys.VARIABLES, NET_VARIABLES]) if FLAGS.command == 'train': learning_rate = tf.train.exponential_decay( learning_rate=learning_rate, global_step=global_step, decay_steps=FLAGS.lr_half_per, decay_rate=0.5, staircase=True) ''' Pipeline ''' data.cache_train_files(directory=FLAGS.train_dir) if FLAGS.command == 'train' or FLAGS.command == 'none': train_files = data.get_files(data.DEV, num_pipelines=FLAGS.num_train_pipelines, subsample_ratio=FLAGS.subsample_ratio, directory=FLAGS.train_dir) train_values = data.get_train_values( train_files, batch_size=FLAGS.batch_size) if FLAGS.command == 'train': test_files = data.get_files(data.VAL, num_pipelines=FLAGS.num_test_pipelines, directory=FLAGS.train_dir) elif FLAGS.command == 'test': test_files = data.get_files(data.TEST, num_pipelines=FLAGS.num_test_pipelines, directory=FLAGS.test_dir) elif test_values = data.get_test_values( test_files, batch_size=FLAGS.batch_size, num_test_crops=FLAGS.num_test_crops) test_batch_size = FLAGS.batch_size / FLAGS.num_test_crops test_iteration = sum(map(len, test_files)) / test_batch_size