Exemple #1
0
def test_single_dialog_existing_file_one_update(user_input,
                                                prepare_config_file,
                                                monkeypatch):
    """This test checks that key update in the config works:
    * User input:
        * 'n' -> no update
        * 'y' -> update
        * '' -> update
    * If update happens - key is really updated
    """
    config, config_text = prepare_config_file
    new_value = "new_value"
    test_input = [
        "Y",  # overwrite file
        user_input,  # whether the value should be updated
        new_value,  # new value for the parameter
        "Y",  # save the file
    ]
    setup_input(monkeypatch, test_input)

    assert config_dialog(config, ["update_node"])
    if user_input in ["Y", ""]:
        assert (parse(config.read_text())["update_node"] == new_value
                ), "Existing value was not updated"
    else:
        assert config.read_text() == config_text
Exemple #2
0
def test_single_dialog_new_config(tmp_path, monkeypatch, save_agree):
    """Tests single run of config_dialog by populating a blank file"""
    path: Path = tmp_path / "config.toml"
    # File will be created as a result of the test
    file_created = save_agree or save_agree is None

    if save_agree:
        save_agree = "Y"
    elif save_agree is None:
        save_agree = ""
    else:
        save_agree = "N"

    # The monkey patched test cli_input. Should be the same length as the list of params fed to config_dialog later
    # Note: click provides validations for confirmation prompts, no need to test for garbage cli_input
    test_input = [
        "author name", "https://confluence.local", "page title", save_agree
    ]
    setup_input(monkeypatch, test_input)

    # In this scenario file should be created when and only when the function returned true
    assert (config_dialog(
        Path(path),
        ["author", "auth.url", "pages.page1.page_title"]) == path.exists())
    if file_created:
        assert (path.read_text() == """author = "author name"

[auth]
url = "https://confluence.local"

[pages]
[pages.page1]
page_title = "page title"
""")
Exemple #3
0
def test_incremental_config_dialog(prepare_config_file, monkeypatch, capsys):
    config, config_text = prepare_config_file
    new_value = "new_value"
    setup_input(monkeypatch, [new_value, "Y"])
    assert config_dialog(config, attributes=["new_path"], incremental=True)
    captured = capsys.readouterr()
    assert ("Current content" not in captured.out
            ), "incremental should suppress printing file altogether"
    with pytest.raises(Exception):
        config_dialog(config, attributes=["update_node"], incremental=True)
Exemple #4
0
def test_sensitive_parameter_file_mode(tmp_path, monkeypatch, capsys):
    """Checks that if there is a parameter - the wizard will create file with 600 permissions and alert the user"""
    monkeypatch.setattr("getpass.getpass", lambda x: "password")
    setup_input(monkeypatch, ("password", "Y"))
    config_path: Path = tmp_path / "config.toml"
    assert config_dialog(
        config_path,
        attributes=[DialogParameter("hidden param", hide_input=True)])
    captured = capsys.readouterr()
    assert "sensitive parameter was passed" in captured.out
    assert config_path.stat().st_mode == 33152
Exemple #5
0
def test_dialog_converts_filename_to_path(tmp_path, monkeypatch):
    """Makes sure the dialog accepts both Path and strings for config file"""
    path_as_path: Path = tmp_path / "config_path.toml"
    path_as_string: str = str(tmp_path / "config_string.toml")

    # In this scenario file should be created when and only when the function returned true
    for tested_type in [path_as_path, path_as_string]:
        # Taken from previous test
        test_input = [
            "author name", "https://confluence.local", "page title", "Y"
        ]
        setup_input(monkeypatch, test_input)
        assert config_dialog(tested_type,
                             ["author", "auth.url", "pages.page1.page_title"])
        assert Path(tested_type).exists()
Exemple #6
0
def test_prompt_function_confirm_function(monkeypatch, filter_mode):
    s = StateConfig()
    if filter_mode is not None:
        s.filter_mode = filter_mode
    else:
        assert not s.filter_mode
    setup_input(monkeypatch, cli_input=("Y", "Y"))

    if s.filter_mode:
        with pytest.raises(Exception):
            s.prompt_function("Prompt?")
        with pytest.raises(Exception):
            s.confirm_function("Prompt?")
    else:
        _ = s.prompt_function("Prompt?")
        assert _ == "Y"
        _ = s.confirm_function("Prompt?")
        assert _
Exemple #7
0
def test_single_dialog_existing_file_base(mode, user_agrees_to_overwrite_file,
                                          prepare_config_file, monkeypatch):
    config, config_text = prepare_config_file
    new_value = "new_value"

    if user_agrees_to_overwrite_file:
        user_input = ["Y", new_value, "Y"]
    else:
        user_input = ["N"]

    setup_input(monkeypatch, user_input)

    if user_agrees_to_overwrite_file:
        node_path = "new_node"
        assert config_dialog(config, [node_path])
        assert (parse(config.read_text())[node_path] == new_value
                ), "Value was not set to the new one"
    else:
        assert config_dialog(config, ["update"]) is None
        assert config.read_text() == config_text
Exemple #8
0
def run_training( cfg ):
    # set up logging
    tf.logging.set_verbosity( tf.logging.INFO )

    with tf.Graph().as_default() as g:
        # create ops and placeholders
        inputs = utils.setup_input( cfg, is_training=False, use_filename_queue=True )
        RuntimeDeterminedEnviromentVars.load_dynamic_variables( inputs, cfg )
        RuntimeDeterminedEnviromentVars.populate_registered_variables()

        # build model (and losses and train_op)
        model = setup_model( inputs, cfg, is_training=False )

        # set up metrics to evaluate
        names_to_values, names_to_updates = setup_metrics( inputs, model, cfg )

        # execute training 
        start_time = time.time()
        utils.print_start_info( cfg, inputs[ 'max_steps' ], is_training=False )

        training_runners = { 'sess': tf.Session(), 'coord': tf.train.Coordinator() }
        data_prefetch_init_fn = utils.get_data_prefetch_threads_init_fn( inputs, cfg, is_training=False, use_filename_queue=True )
        training_runners[ 'threads' ] = data_prefetch_init_fn( training_runners[ 'sess' ], training_runners[ 'coord' ] )
        try:
            # This just returns the imput as output. It is for testing data
            #  input only. 
            for step in xrange( inputs[ 'max_steps' ] ):
                input_batch, target_batch, data_idx = training_runners['sess'].run( [ 
                        model['input_batch'],  model['target_batch'], model[ 'data_idxs' ] ] )

                if training_runners['coord'].should_stop():
                    break
        finally:
            utils.request_data_loading_end( training_runners )
            utils.end_data_loading_and_sess( training_runners )
        # else: # Use tf.slim
        #     train_log_dir = os.path.join( cfg['log_dir'], 'slim-train' )

        #     # When ready to use a model, use the code below
        #     train(  model[ 'train_op' ],
        #             train_log_dir,
        #             get_data_prefetch_threads_init_fn( inputs, cfg ), 
        #             global_step=model[ 'global_step' ],
        #             number_of_steps=inputs[ 'max_steps' ],
        #             init_fn=model[ 'init_fn' ],
        #             save_summaries_secs=300,
        #             save_interval_secs=600,
        #             saver=model[ 'saver_op' ] ) 

        end_train_time = time.time() - start_time
        print('time to train %d epochs: %.3f hrs' % (cfg['num_epochs'], end_train_time/(60*60)))
        print('avg time per epoch: %.3f hrs' % ( (end_train_time/(60*60)) / cfg['num_epochs']) )
Exemple #9
0
def test_single_dialog_existing_file_multiple_updates(user_updates_values,
                                                      prepare_config_file,
                                                      monkeypatch):
    """In a scenario where there are more than 1 keys to update - make sure that permutations of user input are
    handled correctly"""
    new_value = "new_value"
    update_attrs = ["update_node", "parent.parent_update_node"]
    answer_1, answer_2 = user_updates_values
    config, config_text = prepare_config_file
    test_input = ["Y"]
    for user_answer in [answer_1, answer_2]:
        if user_answer:
            test_input += ["Y", new_value]
        else:
            test_input += ["N"]
    test_input += ["Y"]
    setup_input(monkeypatch, test_input)

    assert config_dialog(config, update_attrs)
    for user_answer, attr in zip([answer_1, answer_2], update_attrs):
        # The value should be updated <=> user said yes
        assert user_answer == (get_attribute_by_path(
            attribute_path=attr,
            config=parse(config.read_text())) == new_value)
def run_to_task(task_to):

    import general_utils
    from   general_utils import RuntimeDeterminedEnviromentVars
    import models.architectures as architectures
    from   data.load_ops import resize_rescale_image
    import utils
    from   data.task_data_loading import load_and_specify_preprocessors_for_representation_extraction
    import lib.data.load_ops as load_ops
    import pdb
    global synset
    synset_1000 = [" ".join(i.split(" ")[1:]) for i in synset]
    select = np.asarray([ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  1.,
        1.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,
        0.,  0.,  0.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  1.,
        1.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  1.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,
        0.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  1.,  0.,
        0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  1.,  0.,  0.,  1.,
        0.,  1.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  0.,
        1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,
        1.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  0.,  1.,  0.,  1.,
        0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  1.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,
        0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,
        0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,
        0.,  1.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.])

    with open('/home/ubuntu/task-taxonomy-331b/lib/data/places_class_names.txt', 'r') as fp:
        synset_places = [x.rstrip()[4:-1] for x,y in zip(fp.readlines(), select) if y == 1.]

    
    tf.logging.set_verbosity(tf.logging.ERROR)
   
    args = parser.parse_args()
    if args.task is not 'NONE':
        args.idx = list_of_tasks.index(args.task)
    for idx, task in enumerate(list_of_tasks):
        if idx != args.idx and args.idx != -1:
            continue
        if task == 'class_places':
            synset = synset_places
        elif task == 'class_1000':
            synset = synset_1000
        print("Doing {task}".format(task=task))
        general_utils = importlib.reload(general_utils)
        tf.reset_default_graph()
        training_runners = { 'sess': tf.InteractiveSession(), 'coord': tf.train.Coordinator() }

        # task = '{f}__{t}__{hs}'.format(f=task_from, t=task_to, hs=args.hs)
        CONFIG_DIR = '/home/ubuntu/task-taxonomy-331b/experiments/final/{TASK}'.format(TASK=task)

        ############## Load Configs ##############
        cfg = utils.load_config( CONFIG_DIR, nopause=True )
        RuntimeDeterminedEnviromentVars.register_dict( cfg )
        split_file = os.path.join('/home/ubuntu/task-taxonomy-331b/assets/aws_data/', 'video2_info.pkl')
        cfg['train_filenames'] = split_file
        cfg['val_filenames'] = split_file
        cfg['test_filenames'] = split_file 

        cfg['num_epochs'] = 2
        cfg['randomize'] = False
        root_dir = cfg['root_dir']
        cfg['num_read_threads'] = 1
        print(cfg['log_root'])
        cfg['model_path'] = os.path.join(
                cfg['log_root'],
                task,
                'model.permanent-ckpt'
            )

        print( cfg['model_path'])
        if cfg['model_path'] is None:
            continue
        cfg['dataset_dir'] = '/home/ubuntu'
        cfg['preprocess_fn'] = load_and_specify_preprocessors_for_representation_extraction
        ############## Set Up Inputs ##############
        # tf.logging.set_verbosity( tf.logging.INFO )
        inputs = utils.setup_input( cfg, is_training=ON_TEST_SET, use_filename_queue=False ) # is_training determines whether to use train/validaiton
        RuntimeDeterminedEnviromentVars.load_dynamic_variables( inputs, cfg )
        RuntimeDeterminedEnviromentVars.populate_registered_variables()
        start_time = time.time()
        # utils.print_start_info( cfg, inputs[ 'max_steps' ], is_training=False )

        ############## Set Up Model ##############
        model = utils.setup_model( inputs, cfg, is_training=IN_TRAIN_MODE )
        m = model[ 'model' ]
        model[ 'saver_op' ].restore( training_runners[ 'sess' ], cfg[ 'model_path' ] )

        ############## Start dataloading workers ##############
        data_prefetch_init_fn = utils.get_data_prefetch_threads_init_fn( 
            inputs, cfg, is_training=ON_TEST_SET, use_filename_queue=False )

        prefetch_threads = threading.Thread(
            target=data_prefetch_init_fn,
            args=( training_runners[ 'sess' ], training_runners[ 'coord' ] ))
        prefetch_threads.start()
       
        list_of_fname = np.load('/home/ubuntu/task-taxonomy-331b/assets/aws_data/video2_fname.npy')
        import errno

        try:
            os.mkdir('/home/ubuntu/{}'.format(task))
            os.mkdir('/home/ubuntu/{}/vid1'.format(task))
            os.mkdir('/home/ubuntu/{}/vid2'.format(task))
            os.mkdir('/home/ubuntu/{}/vid3'.format(task))
            os.mkdir('/home/ubuntu/{}/vid4'.format(task))
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise
        curr_comp = np.zeros((3,64))
        curr_fit_img = np.zeros((256,256,3))
        embeddings = []
        ############## Run First Batch ##############

        for step_num in range(inputs['max_steps'] - 1):
        #for step_num in range(1):
            #if step_num > 0 and step_num % 20 == 0:
            print(step_num)
            if not hasattr(m, 'masks'):
                ( 
                    input_batch, target_batch, 
                    data_idx, 
                    predicted, loss,
                ) = training_runners['sess'].run( [ 
                    m.input_images, m.targets,
                    model[ 'data_idxs' ], 
                    m.decoder_output, m.total_loss] )
                mask_batch = 1.
            else:
                ( 
                    input_batch, target_batch, mask_batch,
                    data_idx, 
                    predicted, loss,
                ) = training_runners['sess'].run( [ 
                    m.input_images, m.targets, m.masks,
                    model[ 'data_idxs' ], 
                    m.decoder_output, m.total_loss] )

            if task == 'segment2d' or task == 'segment25d':
                from sklearn.decomposition import PCA  
                x = np.zeros((32,256,256,3), dtype='float')
                k_embed = 8
#                 for i in range(predicted.shape[0]):
                    # embedding_flattened = np.squeeze(predicted[i]).reshape((-1,64))
                    # pca = PCA(n_components=3)
                    # pca.fit(embedding_flattened)
                    # min_order = None
                    # min_dist = float('inf')
                    # for order in itertools.permutations([0,1,2]):
                        # reordered = pca.components_[list(order), :]
                        # dist = np.linalg.norm(curr_comp-reordered)
                        # if dist < min_dist:
                            # min_order = list(order)
                            # min_dist = dist
                    # print(min_order)
                    # pca.components_ = pca.components_[min_order, :]
                    # curr_comp = pca.components_
                    # lower_dim = pca.transform(embedding_flattened).reshape((256,256,-1))
                    # lower_dim = (lower_dim - lower_dim.min()) / (lower_dim.max() - lower_dim.min())
                    # x[i] = lower_dim
                for i in range(predicted.shape[0]):
                    embedding_flattened = np.squeeze(predicted[i]).reshape((-1,64))
                    embeddings.append(embedding_flattened)
                    if len(embeddings) > k_embed:
                        embeddings.pop(0)
                    pca = PCA(n_components=3)
                    pca.fit(np.vstack(embeddings))
                    min_order = None
                    min_dist = float('inf')
                    copy_of_comp = np.copy(pca.components_)
                    for order in itertools.permutations([0,1,2]):
                        #reordered = pca.components_[list(order), :]
                        #dist = np.linalg.norm(curr_comp-reordered)
                        pca.components_ = copy_of_comp[order, :]
                        lower_dim = pca.transform(embedding_flattened).reshape((256,256,-1))
                        lower_dim = (lower_dim - lower_dim.min()) / (lower_dim.max() - lower_dim.min())
                        dist = np.linalg.norm(lower_dim - curr_fit_img)
                        if dist < min_dist:
                            min_order = order 
                            min_dist = dist
                    pca.components_ = copy_of_comp[min_order, :]
                    lower_dim = pca.transform(embedding_flattened).reshape((256,256,-1))
                    lower_dim = (lower_dim - lower_dim.min()) / (lower_dim.max() - lower_dim.min())
                    curr_fit_img = np.copy(lower_dim)
                    x[i] = lower_dim
                predicted = x
            if task == 'curvature':
                std = [31.922, 21.658]
                mean = [123.572, 120.1]
                predicted = (predicted * std) + mean
                predicted[:,0,0,:] = 0.
                predicted[:,1,0,:] = 1.
                predicted = np.squeeze(np.clip(predicted.astype(int) / 255., 0., 1. )[:,:,:,0])

            just_rescale = ['autoencoder', 'denoise', 'edge2d', 
                            'edge3d', 'keypoint2d', 'keypoint3d',
                            'reshade', 'rgb2sfnorm']
            if task in just_rescale:
                predicted = (predicted + 1.) / 2.
                predicted = np.clip(predicted, 0., 1.)
                predicted[:,0,0,:] = 0.
                predicted[:,1,0,:] = 1.


            just_clip = ['rgb2depth', 'rgb2mist']
            if task in just_clip:
                predicted[:,0,0,:] = 0.
                predicted[:,1,0,:] = 1.

            if task == 'segmentsemantic_rb':
                label = np.argmax(predicted, axis=-1)
                COLORS = ('white','red', 'blue', 'yellow', 'magenta', 
                        'green', 'indigo', 'darkorange', 'cyan', 'pink', 
                        'yellowgreen', 'black', 'darkgreen', 'brown', 'gray',
                        'purple', 'darkviolet')
                rgb = (input_batch + 1.) / 2.
                preds = [color.label2rgb(np.squeeze(x), np.squeeze(y), colors=COLORS, kind='overlay')[np.newaxis,:,:,:] for x,y in zip(label, rgb)]
                predicted = np.vstack(preds) 

            if task in ['class_1000', 'class_places']:
                for file_idx, predict_output in zip(data_idx, predicted):
                    to_store_name = list_of_fname[file_idx].decode('utf-8').replace('video', task)
                    to_store_name = os.path.join('/home/ubuntu', to_store_name)
                    sorted_pred = np.argsort(predict_output)[::-1]
                    top_5_pred = [synset[sorted_pred[i]] for i in range(5)]
                    to_print_pred = "Top 5 prediction: \n {}\n {}\n {}\n {} \n {}".format(*top_5_pred)
                    img = Image.new('RGBA', (400, 200), (255, 255, 255))
                    d = ImageDraw.Draw(img)
                    fnt = ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSerifCondensed.ttf', 25)
                    d.text((20, 5), to_print_pred, fill=(255, 0, 0), font=fnt)
                    img.save(to_store_name, 'PNG')
            else:
                for file_idx, predict_output in zip(data_idx, predicted):
                    to_store_name = list_of_fname[file_idx].decode('utf-8').replace('video', task)
                    to_store_name = os.path.join('/home/ubuntu', to_store_name)
                    scipy.misc.toimage(np.squeeze(predict_output), cmin=0.0, cmax=1.0).save(to_store_name)

        subprocess.call('tar -czvf /home/ubuntu/{t}.tar.gz /home/ubuntu/{t}'.format(t=task), shell=True)
        subprocess.call('aws s3 cp /home/ubuntu/{t}.tar.gz s3://task-preprocessing-512-oregon/video2/'.format(t=task), shell=True)
        subprocess.call('ffmpeg -r 29.97 -f image2 -s 256x256 -i /home/ubuntu/{t}/vid2/020%04d.png -vcodec libx264 -crf 15  -pix_fmt yuv420p {t}_2.mp4'.format(t=task), shell=True)
        subprocess.call('aws s3 cp {t}_2.mp4 s3://task-preprocessing-512-oregon/video2/'.format(t=task), shell=True)

                

        ############## Clean Up ##############
        training_runners[ 'coord' ].request_stop()
        training_runners[ 'coord' ].join()
        
        # if os.path.isfile(pickle_dir): 
        #     with open(pickle_dir, 'rb') as fp:
        #         all_outputs = pickle.load(fp)
                
        ############## Store to dict ##############
        
        print("Done: {}".format(task))
        # os.system("sudo cp {d} /home/ubuntu/s3/model_log".format(d=pickle_dir))

        ############## Reset graph and paths ##############            
        tf.reset_default_graph()
        training_runners['sess'].close()

    return
def run_rand_baseline( args, cfg, given_task ):
    # set up logging
    tf.logging.set_verbosity( tf.logging.INFO )

    with tf.Graph().as_default() as g:
        # create ops and placeholders
        tf.logging.set_verbosity( tf.logging.INFO )
        inputs = utils.setup_input( cfg, is_training=False, use_filename_queue=False )
        RuntimeDeterminedEnviromentVars.load_dynamic_variables( inputs, cfg )
        RuntimeDeterminedEnviromentVars.populate_registered_variables()
        
        # build model (and losses and train_op)
        model = utils.setup_model( inputs, cfg, is_training=False )

        # set up metrics to evaluate
        names_to_values, names_to_updates = setup_metrics( inputs, model, cfg )

        # execute training 
        start_time = time.time()
        utils.print_start_info( cfg, inputs[ 'max_steps' ], is_training=False )

        # start session and restore model
        training_runners = { 'sess': tf.Session(), 'coord': tf.train.Coordinator() }
        try:
            
            utils.print_start_info( cfg, inputs[ 'max_steps' ], is_training=False )

            data_prefetch_init_fn = utils.get_data_prefetch_threads_init_fn( inputs, cfg, is_training=False, use_filename_queue=False )
            #training_runners[ 'threads' ] = data_prefetch_init_fn( training_runners[ 'sess' ], training_runners[ 'coord' ] )
            prefetch_threads = threading.Thread(
                target=data_prefetch_init_fn,
                args=( training_runners[ 'sess' ], training_runners[ 'coord' ] ))
            prefetch_threads.start()
            
            # run one example so that we can calculate some statistics about the representations
            targets = training_runners['sess'].run( inputs[ 'target_batch' ] )         
       
            # run the remaining examples
            for step in range( inputs[ 'max_steps' ] - 1 ):
            #for step in range( 10 ):
                if step % 100 == 0: 
                    print( 'Step {0} of {1}'.format( step, inputs[ 'max_steps' ] - 1 ))
               
                target = training_runners['sess'].run( inputs[ 'target_batch' ] )  
                targets = np.append( targets, target, axis=0)

                if training_runners['coord'].should_stop():
                    break

            rand_idx = [random.randint(0, targets.shape[0] - 1) for i in range(targets.shape[0])] 
            rand_target = [targets[i] for i in rand_idx]
            rand_target = np.vstack(rand_target)

            counter = 0
            sum = 0
            for step in range( inputs[ 'max_steps' ] - 1 ):
            #for step in range( 10 ):
                if step % 100 == 0: 
                    print( 'Step {0} of {1}'.format( step, inputs[ 'max_steps' ] - 1 ))
               
                tar = targets[step*cfg['batch_size']:(step+1)*cfg['batch_size']]
                rand = rand_target[step*cfg['batch_size']:(step+1)*cfg['batch_size']]

                losses = training_runners['sess'].run( model['model'].losses, feed_dict={
                    inputs['target_batch']: tar, model['model'].final_output:rand})
                sum += losses[0]
                counter += 1
                
                if training_runners['coord'].should_stop():
                    break

            print(sum)
            print(counter)
            print('random_baseline has loss: {loss}'.format(loss=sum/counter))
            end_train_time = time.time() - start_time
            
        finally:
            utils.request_data_loading_end( training_runners )
            utils.end_data_loading_and_sess( training_runners )
Exemple #12
0
def run_training(cfg, cfg_dir, args):
    if args.stat_type == "mean":
        statistic = MeanMeter(cfg)
    elif args.stat_type == 'median':
        statistic = MedianMeter(cfg)
    elif args.stat_type == 'marginal':
        statistic = DiscreteDistributionMeter(cfg, args.not_one_hot)
    elif args.stat_type == 'dense_marginal':
        statistic = DenseDiscreteDistributionMeter(cfg)
    elif args.stat_type == 'moments':
        statistic = MomentsMeter(cfg)
    else:
        raise NotImplementedError("No average defined for type: {}".format(
            args.stat_type))

    # set up logging
    tf.logging.set_verbosity(tf.logging.ERROR)

    with tf.Graph().as_default() as g:
        # create ops and placeholders
        inputs = utils.setup_input(cfg, is_training=False)
        RuntimeDeterminedEnviromentVars.load_dynamic_variables(inputs, cfg)
        RuntimeDeterminedEnviromentVars.populate_registered_variables()

        # execute training
        start_time = time.time()
        max_steps = get_max_steps(inputs['max_steps'], args.data_split)
        utils.print_start_info(cfg, max_steps, is_training=False)
        data_prefetch_threads_init_fn = utils.get_data_prefetch_threads_init_fn(
            inputs, cfg, is_training=False)
        training_runners = {
            'sess': tf.Session(),
            'coord': tf.train.Coordinator()
        }

        prefetch_threads = threading.Thread(
            target=data_prefetch_threads_init_fn,
            args=(training_runners['sess'], training_runners['coord']))
        prefetch_threads.start()

        target_batch = training_runners['sess'].run(inputs['target_batch'])
        # training_runners[ 'threads' ] = data_prefetch_init_fn( training_runners[ 'sess' ], training_runners[ 'coord' ] )
        try:
            # This just returns the imput as output. It is for testing data
            #  input only.
            start_time = time.time()
            batch_time = time.time()
            k = int(args.print_every)
            for step in range(max_steps):
                target_batch, mask_batch = training_runners['sess'].run(
                    [inputs['target_batch'], inputs['mask_batch']])
                target_batch = map_to_img(target_batch.mean(axis=0), cfg)
                if len(mask_batch.shape) > 1:
                    mask_batch = mask_batch.mean(axis=0)
                else:
                    mask_batch = 1

                statistic.update(target_batch, mask_batch)
                if (step + 1) % k == 0:
                    print('Step %d/%d: %.2f s/step ' %
                          (step + 1, max_steps,
                           (time.time() - batch_time) / k))
                    batch_time = time.time()
                    # print(statistic.get())
                    # break
                if training_runners['coord'].should_stop():
                    break

            end_train_time = time.time() - start_time
            print('time to train %d epochs: %.3f hrs' %
                  (cfg['num_epochs'], end_train_time / (60 * 60)))
            print('avg time per epoch: %.3f hrs' %
                  ((end_train_time / (60 * 60)) / cfg['num_epochs']))
            if args.stat_type == 'moments':
                save_moments(statistic, cfg, args)
            else:
                save_data(statistic, cfg, args)
        finally:
            utils.request_data_loading_end(training_runners)
            utils.end_data_loading_and_sess(training_runners)
Exemple #13
0
def run_training(cfg):
    # set up logging
    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default() as g:
        # create ops and placeholders
        inputs = utils.setup_input(cfg,
                                   is_training=False,
                                   use_filename_queue=True)
        RuntimeDeterminedEnviromentVars.load_dynamic_variables(inputs, cfg)
        RuntimeDeterminedEnviromentVars.populate_registered_variables()

        # build model (and losses and train_op)
        model = setup_model(inputs, cfg, is_training=False)

        # set up metrics to evaluate
        names_to_values, names_to_updates = setup_metrics(inputs, model, cfg)

        # execute training
        start_time = time.time()
        utils.print_start_info(cfg, inputs['max_steps'], is_training=False)

        # start session and restore model
        training_runners = {
            'sess': tf.Session(),
            'coord': tf.train.Coordinator()
        }
        if cfg['model_path'] is None:
            print('Please specify a checkpoint directory')
            return
        cfg['randomize'] = False
        model['saver_op'].restore(training_runners['sess'], cfg['model_path'])

        utils.print_start_info(cfg,
                               inputs['max_steps'],
                               is_training=is_training)

        data_prefetch_init_fn = utils.get_data_prefetch_threads_init_fn(
            inputs, cfg, is_training=False, use_filename_queue=True)
        training_runners['threads'] = data_prefetch_init_fn(
            training_runners['sess'], training_runners['coord'])

        representations, input_batch, target_batch, data_idx = training_runners[
            'sess'].run([
                model['model'].encoder_output, inputs['input_batch'],
                inputs['target_batch'], inputs['data_idxs'],
                inputs['mask_batch']
            ])

        print('Got first batch representation with size:%s' %
              (representations.shape))
        for step in xrange(inputs['max_steps'] - 1):
            encoder_output, input_batch, target_batch, data_idx = training_runners[
                'sess'].run([
                    model['model'].encoder_output, inputs['input_batch'],
                    inputs['target_batch'], inputs['data_idxs'],
                    inputs['mask_batch']
                ])
            representations = np.append(representations,
                                        encoder_output,
                                        axis=0)

            if training_runners['coord'].should_stop():
                break

        print(
            'The size of representations is %s while we expect it to run for %d steps with batchsize %d'
            % (representations.shape, inputs['max_steps'], cfg['batch_size']))

        utils.request_data_loading_end(training_runners)
        utils.end_data_loading_and_sess(training_runners)

        end_train_time = time.time() - start_time
        print('time to train %d epochs: %.3f hrs' %
              (cfg['num_epochs'], end_train_time / (60 * 60)))
        print('avg time per epoch: %.3f hrs' %
              ((end_train_time / (60 * 60)) / cfg['num_epochs']))
Exemple #14
0
def run_val_test(cfg):
    # set up logging
    tf.logging.set_verbosity(tf.logging.INFO)

    tf.reset_default_graph()
    training_runners = {
        'sess': tf.InteractiveSession(),
        'coord': tf.train.Coordinator()
    }
    # create ops and placeholders
    inputs = utils.setup_input(cfg, is_training=False)
    RuntimeDeterminedEnviromentVars.load_dynamic_variables(inputs, cfg)
    RuntimeDeterminedEnviromentVars.populate_registered_variables()

    # build model (and losses and train_op)
    model = utils.setup_model(inputs, cfg, is_training=False)
    #        full_path = tf.train.latest_checkpoint(checkpoint_dir)
    #        step = full_path.split('-')[-1]

    #    model_path = os.path.join('/home/ubuntu/s3/model_log', cfg['task_name'], 'model.permanent-ckpt')

    model_path = os.path.join('/home/ubuntu/s3/model_log_final',
                              cfg['task_name'], 'model.permanent-ckpt')
    model['saver_op'].restore(training_runners['sess'], model_path)
    m = model['model']
    # execute training
    start_time = time.time()
    utils.print_start_info(cfg, inputs['max_steps'], is_training=False)

    data_prefetch_init_fn = utils.get_data_prefetch_threads_init_fn(
        inputs, cfg, is_training=False, use_filename_queue=False)

    prefetch_threads = threading.Thread(target=data_prefetch_init_fn,
                                        args=(training_runners['sess'],
                                              training_runners['coord']))
    prefetch_threads.start()

    print("Dataloading workers dispatched....")

    return_accuracy = 'return_accuracy' in cfg and cfg['return_accuracy'],

    losses_mean = AverageMeter()
    accuracy_mean = AverageMeter()
    for step in range(inputs['max_steps']):
        #print(step)
        if return_accuracy:
            (data_idx, loss, accuracy) = training_runners['sess'].run(
                [model['data_idxs'], m.losses[0], m.accuracy])
            losses_mean.update(loss)
            accuracy_mean.update(accuracy)
            if step % 100 == 0:
                print(
                    'Step: {step} with Current Losses mean: {loss}; with accuracy: {accur}'
                    .format(step=step,
                            loss=losses_mean.avg,
                            accur=accuracy_mean.avg))
        else:
            (data_idx, loss) = training_runners['sess'].run(
                [model['data_idxs'], m.losses[0]])
            losses_mean.update(loss)
            if step % 100 == 0:
                print('Step: {step} with Current Losses mean: {loss}'.format(
                    step=step, loss=losses_mean.avg))
    if return_accuracy:
        print('Final Losses mean: {loss}; with accuracy: {accur}'.format(
            loss=losses_mean.avg, accur=accuracy_mean.avg))
    else:
        print('Final Losses mean: {loss}'.format(loss=losses_mean.avg))

    end_train_time = time.time() - start_time
    print('time to train %d epochs: %.3f hrs' %
          (cfg['num_epochs'], end_train_time / (60 * 60)))
    print('avg time per epoch: %.3f hrs' % ((end_train_time /
                                             (60 * 60)) / cfg['num_epochs']))
Exemple #15
0
def run_training(cfg, cfg_dir):
    # set up logging
    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default() as g:
        # create ops and placeholders
        inputs = utils.setup_input(cfg, is_training=True)
        RuntimeDeterminedEnviromentVars.load_dynamic_variables(inputs, cfg)
        RuntimeDeterminedEnviromentVars.populate_registered_variables()

        # build model (and losses and train_op)
        model = utils.setup_model(inputs, cfg, is_training=True)

        # execute training
        start_time = time.time()
        utils.print_start_info(cfg, inputs['max_steps'], is_training=True)
        if cfg['model_type'] == 'empty':  # Can't use tf slim because not trainable variables
            training_runners = {
                'sess': tf.Session(),
                'coord': tf.train.Coordinator()
            }
            data_prefetch_init_fn = utils.get_data_prefetch_threads_init_fn(
                inputs, cfg, is_training=True)
            training_runners['threads'] = data_prefetch_init_fn(
                training_runners['sess'], training_runners['coord'])
            try:
                # This just returns the imput as output. It is for testing data
                #  input only.
                for step in xrange(inputs['max_steps']):
                    input_batch, target_batch, data_idx = training_runners[
                        'sess'].run([
                            model['input_batch'], model['target_batch'],
                            model['data_idxs']
                        ])

                    if training_runners['coord'].should_stop():
                        break
            finally:
                utils.request_data_loading_end(training_runners)
                utils.end_data_loading_and_sess(training_runners)
        else:  # Use tf.slim
            train_log_dir = os.path.join(cfg['log_dir'], 'slim-train')
            permanent_checkpoint_dir = os.path.join(cfg['log_dir'],
                                                    'checkpoints')

            session_config = tf.ConfigProto()
            session_config.gpu_options.allow_growth = True

            # When ready to use a model, use the code below
            train(
                model['train_op'],
                train_log_dir,
                utils.get_data_prefetch_threads_init_fn(inputs,
                                                        cfg,
                                                        is_training=True),
                train_step_fn=model['train_step_fn'],
                train_step_kwargs=model['train_step_kwargs'],
                global_step=model['global_step'],
                number_of_steps=inputs['max_steps'],
                number_of_epochs=cfg['num_epochs'],
                init_fn=model['init_fn'],
                save_checkpoint_every=inputs['max_steps'] //
                (cfg['num_epochs'] * 2),
                cfg_dir=cfg_dir,
                #RuntimeDeterminedEnviromentVars.steps_per_epoch,
                permanent_checkpoint_dir=permanent_checkpoint_dir,
                save_summaries_secs=cfg['summary_save_every_secs'],
                save_interval_secs=cfg['checkpoint_save_every_secs'],
                saver=model['saver_op'],
                return_accuracy='return_accuracy' in cfg
                and cfg['return_accuracy'],
                session_config=session_config)

        end_train_time = time.time() - start_time
        print('time to train %d epochs: %.3f hrs' %
              (cfg['num_epochs'], end_train_time / (60 * 60)))
        print('avg time per epoch: %.3f hrs' %
              ((end_train_time / (60 * 60)) / cfg['num_epochs']))
def run_to_task(task_to):

    import general_utils
    from general_utils import RuntimeDeterminedEnviromentVars
    import models.architectures as architectures
    from data.load_ops import resize_rescale_image
    import utils
    from data.task_data_loading import load_and_specify_preprocessors_for_representation_extraction
    import lib.data.load_ops as load_ops
    tf.logging.set_verbosity(tf.logging.ERROR)

    all_outputs = {}
    pickle_dir = 'viz_output_single_task.pkl'
    import os
    if os.path.isfile(pickle_dir):
        with open(pickle_dir, 'rb') as fp:
            all_outputs = pickle.load(fp)

    for task in list_of_tasks:
        if task in all_outputs:
            print("{} already exists....\n\n\n".format(task))
            continue
        print("Doing {task}".format(task=task))
        general_utils = importlib.reload(general_utils)
        tf.reset_default_graph()
        training_runners = {
            'sess': tf.InteractiveSession(),
            'coord': tf.train.Coordinator()
        }

        # task = '{f}__{t}__{hs}'.format(f=task_from, t=task_to, hs=args.hs)
        CONFIG_DIR = '/home/ubuntu/task-taxonomy-331b/experiments/final/{TASK}'.format(
            TASK=task)

        ############## Load Configs ##############
        cfg = utils.load_config(CONFIG_DIR, nopause=True)
        RuntimeDeterminedEnviromentVars.register_dict(cfg)
        split_file = cfg['test_filenames'] if ON_TEST_SET else cfg[
            'val_filenames']
        cfg['train_filenames'] = split_file
        cfg['val_filenames'] = split_file
        cfg['test_filenames'] = split_file

        cfg['num_epochs'] = 1
        cfg['randomize'] = False
        root_dir = cfg['root_dir']
        cfg['num_read_threads'] = 1
        print(cfg['log_root'])
        if task == 'jigsaw':
            continue
        cfg['model_path'] = os.path.join(cfg['log_root'], task,
                                         'model.permanent-ckpt')

        print(cfg['model_path'])
        if cfg['model_path'] is None:
            continue

        ############## Set Up Inputs ##############
        # tf.logging.set_verbosity( tf.logging.INFO )
        inputs = utils.setup_input(
            cfg, is_training=ON_TEST_SET, use_filename_queue=False
        )  # is_training determines whether to use train/validaiton
        RuntimeDeterminedEnviromentVars.load_dynamic_variables(inputs, cfg)
        RuntimeDeterminedEnviromentVars.populate_registered_variables()
        start_time = time.time()
        # utils.print_start_info( cfg, inputs[ 'max_steps' ], is_training=False )

        ############## Set Up Model ##############
        model = utils.setup_model(inputs, cfg, is_training=IN_TRAIN_MODE)
        m = model['model']
        model['saver_op'].restore(training_runners['sess'], cfg['model_path'])

        ############## Start dataloading workers ##############
        data_prefetch_init_fn = utils.get_data_prefetch_threads_init_fn(
            inputs, cfg, is_training=ON_TEST_SET, use_filename_queue=False)

        prefetch_threads = threading.Thread(target=data_prefetch_init_fn,
                                            args=(training_runners['sess'],
                                                  training_runners['coord']))
        prefetch_threads.start()

        ############## Run First Batch ##############
        if not hasattr(m, 'masks'):
            (
                input_batch,
                target_batch,
                data_idx,
                predicted,
                loss,
            ) = training_runners['sess'].run([
                m.input_images, m.targets, model['data_idxs'],
                m.decoder_output, m.total_loss
            ])
            mask_batch = 1.
        else:
            (
                input_batch,
                target_batch,
                mask_batch,
                data_idx,
                predicted,
                loss,
            ) = training_runners['sess'].run([
                m.input_images, m.targets, m.masks, model['data_idxs'],
                m.decoder_output, m.total_loss
            ])

        if task == 'segment2d' or task == 'segment25d':
            from sklearn.decomposition import PCA
            x = np.zeros((32, 256, 256, 3), dtype='float')
            for i in range(predicted.shape[0]):
                embedding_flattened = np.squeeze(predicted[i]).reshape(
                    (-1, 64))
                pca = PCA(n_components=3)
                pca.fit(embedding_flattened)
                lower_dim = pca.transform(embedding_flattened).reshape(
                    (256, 256, -1))
                lower_dim = (lower_dim - lower_dim.min()) / (lower_dim.max() -
                                                             lower_dim.min())
                x[i] = lower_dim
            predicted = x

        ############## Clean Up ##############
        training_runners['coord'].request_stop()
        training_runners['coord'].join()

        # if os.path.isfile(pickle_dir):
        #     with open(pickle_dir, 'rb') as fp:
        #         all_outputs = pickle.load(fp)

        ############## Store to dict ##############
        to_store = {
            'input': input_batch,
            'target': target_batch,
            'mask': mask_batch,
            'data_idx': data_idx,
            'output': predicted
        }
        all_outputs[task] = to_store

        print("Done: {}".format(task))
        # os.system("sudo cp {d} /home/ubuntu/s3/model_log".format(d=pickle_dir))

        ############## Reset graph and paths ##############
        tf.reset_default_graph()
        training_runners['sess'].close()
        try:
            del sys.modules['config']
        except:
            pass
        sys.path = remove_dups(sys.path)
        print("FINISHED: {}\n\n\n\n\n\n".format(task))
        pickle_dir = 'viz_output_single_task.pkl'
        with open(pickle_dir, 'wb') as fp:
            pickle.dump(all_outputs, fp)
        try:
            subprocess.call(
                "aws s3 cp {} s3://task-preprocessing-512-oregon/visualizations/"
                .format(pickle_dir),
                shell=True)
        except:
            subprocess.call(
                "sudo cp {} /home/ubuntu/s3/visualizations/".format(
                    pickle_dir),
                shell=True)

    return
Exemple #17
0
def run_extract_representations( args, cfg ):
    # set up logging
    tf.logging.set_verbosity( tf.logging.INFO )

    with tf.Graph().as_default() as g:
        cfg['randomize'] = False
        cfg['num_epochs'] = 1
        # cfg['num_read_threads'] = 5
        # cfg['batch_size']=2
        #if cfg['model_path'] is None:
        #    cfg['model_path'] = tf.train.latest_checkpoint( os.path.join( args.cfg_dir, "logs/slim-train/" ) )
        cfg['model_path'] = os.path.join( args.cfg_dir, "logs/slim-train/model.ckpt-59690")
        # create ops and placeholders
        tf.logging.set_verbosity( tf.logging.INFO )
        inputs = utils.setup_input( cfg, is_training=False, use_filename_queue=True )
        RuntimeDeterminedEnviromentVars.load_dynamic_variables( inputs, cfg )
        RuntimeDeterminedEnviromentVars.populate_registered_variables()
        
        # build model (and losses and train_op)
        model = utils.setup_model( inputs, cfg, is_training=False )

        # set up metrics to evaluate
        names_to_values, names_to_updates = setup_metrics( inputs, model, cfg )

        # execute training 
        start_time = time.time()
        utils.print_start_info( cfg, inputs[ 'max_steps' ], is_training=False )

        # start session and restore model
        training_runners = { 'sess': tf.Session(), 'coord': tf.train.Coordinator() }
        try:
            if cfg['model_path'] is None:
                print('Please specify a checkpoint directory')
                return	
            
            model[ 'saver_op' ].restore( training_runners[ 'sess' ], cfg[ 'model_path' ] )
            
            utils.print_start_info( cfg, inputs[ 'max_steps' ], is_training=False )

            data_prefetch_init_fn = utils.get_data_prefetch_threads_init_fn( inputs, cfg, is_training=False, use_filename_queue=True )
            training_runners[ 'threads' ] = data_prefetch_init_fn( training_runners[ 'sess' ], training_runners[ 'coord' ] )
            
            # run one example so that we can calculate some statistics about the representations
            filenames = []
            representations, data_idx = training_runners['sess'].run( [ 
                    model['model'].encoder_output, inputs[ 'data_idxs' ] ] )        
            filenames += [ inputs[ 'filepaths_list'][ i ] for i in data_idx ]
            print( 'Got first batch representation with size: {0}'.format( representations.shape ) )

            # run the remaining examples
            for step in xrange( inputs[ 'max_steps' ] - 1 ):
                if step % 100 == 0: 
                    print( 'Step {0} of {1}'.format( step, inputs[ 'max_steps' ] - 1 ))
                encoder_output, data_idx = training_runners['sess'].run( [
                        model['model'].encoder_output, inputs[ 'data_idxs' ] ] )        
                representations = np.append(representations, encoder_output, axis=0)
                filenames += [ inputs[ 'filepaths_list'][ i ] for i in data_idx ]

                if training_runners['coord'].should_stop():
                    break

            print('The size of representations is %s while we expect it to run for %d steps with batchsize %d' % (representations.shape, inputs['max_steps'], cfg['batch_size']))

            end_train_time = time.time() - start_time
            save_path = os.path.join( args.cfg_dir, '../representations.pkl' )
            with open( save_path, 'wb' ) as f:
                pickle.dump( { 'filenames': filenames, 'representations': representations }, f )
            print( 'saved representations to {0}'.format( save_path ))
            print('time to train %d epochs: %.3f hrs' % (cfg['num_epochs'], end_train_time/(60*60)))
            print('avg time per epoch: %.3f hrs' % ( (end_train_time/(60*60)) / cfg['num_epochs']) )
        finally:
            utils.request_data_loading_end( training_runners )
            utils.end_data_loading_and_sess( training_runners )
Exemple #18
0
def test_page_add_dialog(prepare_config_file, monkeypatch):
    config, config_text = prepare_config_file
    setup_input(monkeypatch, ["title", "testfile", "", "SPC", "Y"])
    assert page_add_dialog(config)
    assert (get_attribute_by_path("pages.page1.page_title",
                                  parse(config.read_text())) == "title")