예제 #1
0
def run_internal_eval(eval_model,
                      eval_sess,
                      model_dir,
                      hps,
                      summary_writer,
                      use_test_set=False):
    """Compute internal evaluation (perplexity) for both dev / test."""
    with eval_model.graph.as_default():
        loaded_eval_model, global_step = model_helper.create_or_load_model(
            eval_model.model, model_dir, eval_sess, "eval")
    dev_src_files, dev_tgt_files = data.get_files(hps.data_dir,
                                                  hps.test_prefix)
    dev_eval_iterator_feed_dict = {
        eval_model.src_file_placeholder: dev_src_files,
        eval_model.tgt_file_placeholder: dev_tgt_files
    }
    dev_ppl = _internal_eval(loaded_eval_model, global_step, eval_sess,
                             eval_model.iterator, dev_eval_iterator_feed_dict,
                             summary_writer, "dev")
    test_ppl = None
    if use_test_set and hps.test_prefix:

        test_src_files, test_tgt_files = data.get_files(
            hps.data_dir, hps.test_prefix)
        test_eval_iterator_feed_dict = {
            eval_model.src_file_placeholder: test_src_files,
            eval_model.tgt_file_placeholder: test_tgt_files
        }

        test_ppl = _internal_eval(loaded_eval_model, global_step, eval_sess,
                                  eval_model.iterator,
                                  test_eval_iterator_feed_dict, summary_writer,
                                  "test")

    return dev_ppl, test_ppl
예제 #2
0
def get_tag_files(overwrite, score_mocs):
    """ Get .tags files. """
    tag_files = get_files('tags', 'tags')
    if overwrite or score_mocs:
        return tag_files
    else:
        score_files_bn = get_basenames(get_files('scores', 'scores'))
        return [f for f in tag_files if not has_score_file(f, score_files_bn)]
예제 #3
0
def get_tag_files(overwrite, score_mocs):
    """ Get .tags files. """
    tag_files = get_files('tags', 'tags')
    if overwrite or score_mocs:
        return tag_files
    else:
        score_files_bn = get_basenames(get_files('scores', 'scores'))
        return [f for f in tag_files if not has_score_file(f, score_files_bn)]
def test():
    test_data_lists = get_files(config.test_data,"test")
    test_datagen = data_generator(test_data_lists,"test",augument=False).create_train()
    model = get_model()
    model.load_weights(config.weights_path)
    predicted_labels = np.argmax(model.predict_generator(test_datagen,steps=len(test_data_lists) / 16),axis=-1)  
    print(predicted_labels) 
def get_dataset():
    print(FLAGS.files_checkpoint)
    print(FLAGS.train_subjects)
    dataset_files = get_files(checkpoint=FLAGS.files_checkpoint,
                              train_subjects=FLAGS.train_subjects)
    dataset = get_objects(dataset_files)
    add_extra_dims(dataset)
    return dataset
def main(filelist):
    filenames = data.get_files(filelist)  
    mean,std = compute_mean_std(filenames)

    print('channel mean')
    print(mean)
    print('channel std')
    print(std)
예제 #7
0
def load_scores(n=20):
    """ Create a csv file containing the ranks for the top n hashtags, according to the .scores files.
    n ... number of top/bottom ranked hashtags to include.
    """
    # ranks: spans -> day -> tag -> score. E.g., 30 -> 2014-05-09 -> obamacare -> 10.5
    result = defaultdict(lambda: {})
    for score_file in get_files('scores', 'scores'):
        print score_file
        day, timespan = parse_filename(score_file)
        scores = read_scores(score_file, n)
        result[timespan][day] = scores
    return result
예제 #8
0
def train():
    model = get_model()
    train_generator,validation_generator = get_files("train")

    model.fit_generator(
        train_generator,
        steps_per_epoch= config.nb_train_samples // config.batch_size,
        epochs= config.epochs,
        validation_data=validation_generator,
        validation_steps=config.nb_validation_samples // config.batch_size)

    model.save_weights('./checkpoints/first_try.h5')
예제 #9
0
def load_scores(n=20):
    """ Create a csv file containing the ranks for the top n hashtags, according to the .scores files.
    n ... number of top/bottom ranked hashtags to include.
    """
    # ranks: spans -> day -> tag -> score. E.g., 30 -> 2014-05-09 -> obamacare -> 10.5
    result = defaultdict(lambda: {})
    for score_file in get_files('scores', 'scores'):
        print score_file
        day, timespan = parse_filename(score_file)
        scores = read_scores(score_file, n)
        result[timespan][day] = scores
    return result
예제 #10
0
def main():
    args = docopt(__doc__)
    today = datetime.now()
    timespans = parse_timespans(args['-t'])
    files = get_files('jsons', 'json')
    ndays = int(args['-d'])
    handles = set(twitter_handle_to_party().keys())
    for day in range(ndays):
        ids_seen = set()
        tags_list = dict([(timespan, defaultdict(lambda: Counter())) for timespan in timespans])
        thisday = today - timedelta(days=day)
        print 'pretending today is %s' % thisday.strftime('%Y-%m-%d')
        for f in files:
            parse(f, tags_list, timespans, thisday, ids_seen, handles)
        outfiles = open_outfiles(thisday, timespans)
        for outfile, span in zip(outfiles, timespans):
            write_tags(outfile, tags_list[span])
예제 #11
0
def main():
    args = docopt(__doc__)
    today = datetime.now()
    timespans = parse_timespans(args['-t'])
    files = get_files('jsons', 'json')
    ndays = int(args['-d'])
    handles = set(twitter_handle_to_party().keys())
    for day in range(ndays):
        ids_seen = set()
        tags_list = dict([(timespan, defaultdict(lambda: Counter()))
                          for timespan in timespans])
        thisday = today - timedelta(days=day)
        print 'pretending today is %s' % thisday.strftime('%Y-%m-%d')
        for f in files:
            parse(f, tags_list, timespans, thisday, ids_seen, handles)
        outfiles = open_outfiles(thisday, timespans)
        for outfile, span in zip(outfiles, timespans):
            write_tags(outfile, tags_list[span])
예제 #12
0
def main(filelist):
    filenames = data.get_files(filelist) 

    bs = 1000
    batches = [filenames[i * bs : (i + 1) * bs] 
               for i in range(int(len(filenames) / bs) + 1)]

    Us, evs = [], []
    for batch in batches:
        images = np.array([data.load_augment(f, 256, 256) for f in batch])
        X = images.transpose(0, 2, 3, 1).reshape(-1, 3)
        cov = np.dot(X.T, X) / X.shape[0]
        U, S, V = np.linalg.svd(cov)
        ev = np.sqrt(S)
        Us.append(U)
        evs.append(ev)
    print('U')
    print(np.mean(Us, axis=0))
    print('eigenvalues')
    print(np.mean(evs, axis=0))
def train(callbacks):
    #1. compile
    print("--> Compiling the model...")
    model = get_model()
    # load raw train data
    raw_train_data_lists = get_files(config.train_data,"train")
    #split raw train data to train and val
    train_data_lists,val_data_lists = train_test_split(raw_train_data_lists,test_size=0.3)
    # for train
    train_datagen = data_generator(train_data_lists,"train",augument=True).create_train()
    #embed()
    # val data
    val_datagen = data_generator(val_data_lists,"val",augument=True).create_train()  # if model can predict better on augumented data ,the model should be more reboust
    history = model.fit_generator(
        train_datagen,
        validation_data = val_datagen,
        epochs = config.epochs,
        verbose = 1,
        callbacks = callbacks,
        steps_per_epoch=len(train_data_lists) // config.batch_size,
        validation_steps=len(val_data_lists) // config.batch_size
    )
예제 #14
0
파일: bbqnet.py 프로젝트: stmharry/BBQNet
            print('Iteration %d, variance = %.4f' % (iteration, variance_value))
            if not condition:
                break


def get_param_count():
    all_params = 0
    for variable in tf.trainable_variables():
        variable_params = np.prod(variable.get_shape().as_list())
        all_params += variable_params

        print('%s: %d' % (variable.name, variable_params))
    print ('TOTAL: %d' % all_params)


files = {phase: data.get_files(phase) for phase in data._PHASES}
net = get_net()
stat = get_stat()

sess = tf.Session()
with sess.as_default():
    model = util.Model(net['global_step'])
    saver = tf.train.Saver(tf.all_variables(), max_to_keep=32, keep_checkpoint_every_n_hours=2)
    summary_writer = tf.train.SummaryWriter(SUMMARY_PATH)

    checkpoint = tf.train.get_checkpoint_state(SAVE_DIR)
    if checkpoint:
        print('[ Model restored from %s ]' % checkpoint.model_checkpoint_path)
        saver.restore(sess, checkpoint.model_checkpoint_path)
    else:
        print('[ Model initialized ]')
예제 #15
0
    collections=[tf.GraphKeys.VARIABLES, NET_VARIABLES])

if FLAGS.command == 'train':
    learning_rate = tf.train.exponential_decay(
        learning_rate=learning_rate,
        global_step=global_step,
        decay_steps=FLAGS.lr_half_per,
        decay_rate=0.5,
        staircase=True)

''' Pipeline
'''
data.cache_train_files(directory=FLAGS.train_dir)

if FLAGS.command == 'train' or FLAGS.command == 'none':
    train_files = data.get_files(data.DEV, num_pipelines=FLAGS.num_train_pipelines, subsample_ratio=FLAGS.subsample_ratio, directory=FLAGS.train_dir)
    train_values = data.get_train_values(
        train_files,
        batch_size=FLAGS.batch_size)

if FLAGS.command == 'train':
    test_files = data.get_files(data.VAL, num_pipelines=FLAGS.num_test_pipelines, directory=FLAGS.train_dir)
elif FLAGS.command == 'test':
    test_files = data.get_files(data.TEST, num_pipelines=FLAGS.num_test_pipelines, directory=FLAGS.test_dir)
elif
test_values = data.get_test_values(
    test_files,
    batch_size=FLAGS.batch_size,
    num_test_crops=FLAGS.num_test_crops)
test_batch_size = FLAGS.batch_size / FLAGS.num_test_crops
test_iteration = sum(map(len, test_files)) / test_batch_size