Ejemplo n.º 1
0
 def deserialize(self, obj: dict) -> None:
     # for block in obj['blocks']:
     #     item = MetaCell()
     #     item.deserialize(block)
     #     self.cells.append(item)
     self.populate_from_embedding(obj['embedding'])
     self.model_name = obj['model_name']
     self.metrics = Metrics()
     self.metrics.deserialize(obj['metrics'])
     self.hyperparameters = Hyperparameters()
     self.hyperparameters.deserialize(obj['hyperparameters'])
     if 'parent_model_name' in obj:
         self.parent_model_name = obj['parent_model_name']
     else:
         self.parent_model_name = ''
Ejemplo n.º 2
0
def test1():
    defaults = {
        'a': 'a1',
        'b': {
            'c': 'c1'
        },
        'd': 'd1',
    }

    non_hash = {'e': {'f': 'f1', 'g': 'g1'}}

    inputs = {
        'a': 'a2',  # This overrides a hash default
        'b': {
            'c': 'c2'
        },  # This overrides a hash default
        'e': {
            'g': 'g2'
        }  # This overrides a non-hash default
    }

    correct_integration = {
        'a': 'a2',
        'b': {
            'c': 'c2'
        },
        'd': 'd1',
        'e': {
            'f': 'f1',
            'g': 'g2',
        }
    }

    correct_hash = {'a': 'a2', 'b': {'c': 'c2'}}

    # Test integration of hps
    hps = Hyperparameters(inputs, defaults, non_hash)
    print_test_result(hps._integrated_hps, correct_integration,
                      'test 1a: simple nested hp integration with defaults')

    # Test partitioning for hashing
    print_test_result(hps.hps_to_hash, correct_hash,
                      'test 1b: hp partitioning for hashing')

    # Test base case __getattr__ (no recursion necessary)
    print_test_result(hps['a'], 'a2',
                      'test 1c: base __getattr__ (no recursion)')

    # Test recursive __getattr__
    print_test_result(hps['b:c'], 'c2', 'test 1d: recursive __getattr__')

    # Test recursive __setattr__
    hps['e:g'] = 'g2'
    print_test_result(hps['e:g'], 'g2', 'test 1e: recursive __setattr__')

    # Test recursive __setattr__ for originally non-existing item
    # (hopefully this never happens in practice)
    hps['e:h'] = 'h2'
    print_test_result(hps['e:h'], 'h2',
                      'test 1f: recursive __setattr__, non-existing item')
Ejemplo n.º 3
0
def test3():
    # Test integrate_hps using the sub/super application
    # See docstring for integrate_hps
    super_def_hps = {'a': 1, 'b': 2, 'c_hps': {'d': 3, 'e': 4}}
    sub_def_hps = {'b': 5, 'c_hps': {'d': 6}}
    correct_result = {'a': 1, 'b': 5, 'c_hps': {'d': 6, 'e': 4}}
    result = Hyperparameters.integrate_hps(super_def_hps, sub_def_hps)

    print_test_result(result, correct_result,
                      'test 3a: subclass/superclass hp integration')

    print_test_result(
        super_def_hps, {
            'a': 1,
            'b': 2,
            'c_hps': {
                'd': 3,
                'e': 4
            }
        }, 'test 3b: integration effects on original superclass hps')

    print_test_result(sub_def_hps, {
        'b': 5,
        'c_hps': {
            'd': 6
        }
    }, 'test 3c: integration effects on original subclass hps')
Ejemplo n.º 4
0
def verify_mutations():
    params = Hyperparameters()
    params.parameters['TRAIN_ITERATIONS'] = 1
    params.parameters['REDUCTION_EXPANSION_FACTOR'] = 2

    dataset = ImageDataset.get_cifar10_reduced()

    for i in range(50):
        model = MetaModel(params)
        model.populate_with_nasnet_metacells()
        model.build_model(dataset.images_shape)

        model.mutate()
        model.mutate()
        model.mutate()
        model.clear_model()
        tf.keras.backend.clear_session()
Ejemplo n.º 5
0
def test_benchmark_models():
    dir_path = os.path.join(evo_dir, 'cell_evo_benchmarks_6')
    results_path = os.path.join(dir_path, 'results.json')
    mods = [
        ObjectModifier.SizeModifier, ObjectModifier.PerspectiveModifier,
        ObjectModifier.RotationModifier, ObjectModifier.ColorModifier
    ]
    hyperparameters = Hyperparameters()
    cell_samples = 8

    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    # load dataset, or create a new one if one doesn't exist
    dataset_exists = os.path.exists(dir_path) and 'dataset.npy' in os.listdir(
        dir_path)
    if not dataset_exists:
        print('Generating dataset')
        # size, dim, num_classes, vertices per object, objects per image,
        DatasetGenerator.build_task_dataset(40000, (16, 16),
                                            10,
                                            4,
                                            10,
                                            dir_path,
                                            modifiers=mods,
                                            max_depth_of_target=1)
    dataset = DatasetGenerator.get_task_dataset(dir_path)

    embeddings = [
        MetaModel.get_nasnet_embedding(),
        MetaModel.get_s1_embedding(),
        MetaModel.get_identity_embedding(),
        MetaModel.get_m1_sep3_embedding(),
        MetaModel.get_m1_sep7_embedding(),
        MetaModel.get_m1_sep3_serial_embedding(),
    ]

    data = {'embeddings': [], 'accuracies': []}
    if os.path.exists(results_path):
        with open(results_path, 'r') as fl:
            data = json.load(fl)

    def save_data():
        with open(results_path, 'w+') as fl:
            json.dump(data, fl, indent=4)

    for e in embeddings:
        metamodel = MetaModel(hyperparameters)
        metamodel.populate_from_embedding(e)
        accuracies = test_model(metamodel, dataset, cell_samples)
        data['embeddings'].append(metamodel.get_embedding())
        data['accuracies'].append(accuracies)
        save_data()

    performances = [performance(x) for x in data['accuracies']]
    print(performances)
Ejemplo n.º 6
0
def print_test_result(result, correct, name, verbose=False):

    if result == correct:
        print('Passed: %s.' % name)
    else:
        print('Failed %s.' % name)
        verbose = True

    if verbose:

        if isinstance(correct, str):
            correct_str = correct
            result_str = result
        else:
            correct_str = Hyperparameters.printable_str_from_dict(correct)
            result_str = Hyperparameters.printable_str_from_dict(result)

        print('\nCorrect result:\n%s' % correct_str)
        print('\nHyperparameters result:\n%s' % result_str)
Ejemplo n.º 7
0
    def __init__(self, hyperparameters: Hyperparameters = Hyperparameters()):
        super().__init__()
        self.cells: List[MetaCell] = []  # [NORMAL_CELL, REDUCTION_CELL]
        self.hyperparameters = hyperparameters

        self.model_name = 'evo_' + str(time.time())
        self.parent_model_name = ''
        self.metrics = Metrics()
        self.fitness = 0.

        self.keras_model: tf.keras.Model = None
        self.keras_model_data: ModelDataHolder = None
Ejemplo n.º 8
0
def verify_load():
    dir_path = os.path.join(evo_dir, 'test_load_v2')
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    params = Hyperparameters()
    params.parameters['TRAIN_ITERATIONS'] = 1
    params.parameters['REDUCTION_EXPANSION_FACTOR'] = 2

    dataset = ImageDataset.get_cifar10_reduced()

    for i in range(50):
        model = MetaModel(params)
        model.populate_with_nasnet_metacells()
        model.build_model(dataset.images_shape)
        model.save_model(dir_path)
        model.save_metadata(dir_path)
        model.clear_model()
        tf.keras.backend.clear_session()

        other_model = MetaModel.load(dir_path, model.model_name, True)

        tf.keras.backend.clear_session()
Ejemplo n.º 9
0
def test4():
    # Same as test1, but also tests wildcard functionality
    defaults = {'a': 'a1', 'b': {'c': 'c1'}, 'd': 'd1'}

    non_hash = {'e': {'f': 'f1', 'g': 'g1'}}

    inputs = {
        '_wild1': 'w11',  # WILDCARD (no default, forced to hash)
        'a': 'a2',  # This overrides a hash default
        'b': {
            'c': 'c2'
        },  # This overrides a hash default
        'e': {
            'g': 'g2',  # This overrides a non-hash default
            '_wild2': 'w21'  # WILDCARD (no default, forced to hash)
        },
    }

    correct_hash = {
        '_wild1': 'w11',
        'a': 'a2',
        'b': {
            'c': 'c2'
        },
        'e': {
            '_wild2': 'w21'
        },
    }

    correct_integration = {
        '_wild1': 'w11',
        'a': 'a2',
        'b': {
            'c': 'c2'
        },
        'd': 'd1',
        'e': {
            'f': 'f1',
            'g': 'g2',
            '_wild2': 'w21'
        },
    }

    # Test integration of hps
    hps = Hyperparameters(inputs, defaults, non_hash)

    print_test_result(hps._integrated_hps, correct_integration,
                      'test 4a: integration with wildcard (_)')

    print_test_result(hps.hps_to_hash, correct_hash,
                      'test 4b: ensure wildcards are hashed')
Ejemplo n.º 10
0
def cell_performance_test_1():

    hyperparameters = Hyperparameters()

    dataset = ImageDataset.get_cifar10()

    def get_sorted(images, labels):
        sorted_by_class = [[] for _ in range(10)]
        for index in range(len(images)):
            sorted_by_class[labels[index, 0]].append(images[index, :, :])

    sorted_train = get_sorted(dataset.train_images, dataset.train_labels)
    sorted_test = get_sorted(dataset.test_images, dataset.test_labels)

    model = MetaModel(hyperparameters)

    # model.populate_with_nasnet_metacells()
    model.populate_from_embedding(MetaModel.get_nasnet_embedding())

    # model.build_model(dataset.images_shape)
    first_cell = CellDataHolder(3, 3, model.cells[0])

    cell_input = tf.keras.Input(dataset.images_shape)
    cell_output = first_cell.build([cell_input, cell_input])
    cell_model = tf.keras.Model(inputs=cell_input, outputs=cell_output)

    def gram_matrix(input_tensor):
        result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
        input_shape = tf.shape(input_tensor)
        num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
        return result / (num_locations)

    optimizer = tf.keras.optimizers.Adam(
        learning_rate=hyperparameters.parameters['LEARNING_RATE'])

    def loss(real_image, fake_image, output):
        real_maximize = None  # TODO: INNER PROUDCT
        fake_minimize = None

    def train_step(input_image_1, input_image_2):
        with tf.GradientTape() as tape:
            image_1_output = cell_model(input_image_1)
            image_2_output = cell_model(input_image_2)

            total_loss = loss(input_image_1,
                              input_image_2, image_1_output) + loss(
                                  input_image_2, input_image_1, image_2_output)

        gradient = tape.gradient(loss, cell_model.trainable_variables)
        optimizer.apply_gradients(zip(gradient,
                                      cell_model.trainable_variables))
Ejemplo n.º 11
0
def test_nth_in_dir(dir_name, n: int):
    dir_path = os.path.join(evo_dir, dir_name)
    data_path = os.path.join(dir_path, 'results.json')

    with open(data_path, 'r') as fl:
        data = json.load(fl)

    performances = [performance(x) for x in data['accuracies']]

    performances_with_indexes = [(performances[i], data['embeddings'][i])
                                 for i in range(len(performances))]
    num_cells = len(performances[0])  # should be 2
    pwi_per_cell = [performances_with_indexes.copy() for i in range(num_cells)]

    for i in range(num_cells):
        pwi_per_cell[i].sort(key=lambda x: x[0][i])

    selected_embeddings = [x[n][1] for x in pwi_per_cell]

    combined_embeddings = combine_embeddings(selected_embeddings[0],
                                             selected_embeddings[1])
    print(combined_embeddings)

    hyperparameters = Hyperparameters()
    hyperparameters.parameters['TRAIN_EPOCHS'] = 2
    hyperparameters.parameters['TRAIN_ITERATIONS'] = 16
    # hyperparameters.parameters['SGDR_EPOCHS_PER_RESTART'] = hyperparameters.parameters['TRAIN_ITERATIONS'] * hyperparameters.parameters['TRAIN_EPOCHS'] #effectively makes SGDR into basic cosine annealing

    dataset = ImageDataset.get_cifar10()

    metamodel = MetaModel(hyperparameters)
    metamodel.populate_from_embedding(combined_embeddings)
    metamodel.build_model(dataset.images_shape)
    metamodel.evaluate(dataset)
    metamodel.save_metadata(dir_path)
    metamodel.save_model(dir_path)
    metamodel.clear_model()
Ejemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument("-ds", "--dataset", default="AmazonCat-13K", type=str, required=True)
    parser.add_argument("-t", "--head_threshold", default=10000, type=int)
    parser.add_argument("-gpu", "--device_num", default='0', type=str)
    parser.add_argument("-train", "--is_train", default=1, type=int)
    parser.add_argument("-ep", "--epochs", default=8, type=int)
    parser.add_argument("-ft", "--fine_tune", default=0, type=int)
    parser.add_argument("-from", "--ft_from", default=0, type=int)
    args = parser.parse_args()

    ft = (args.fine_tune == 1)
    ft_from = args.ft_from
    hypes = Hyperparameters(args.dataset)
    ds_path = '../datasets/' + args.dataset
    device_num = args.device_num
    head_threshold = args.head_threshold
    with open(ds_path+'/mlc2seq/train_heads_X-'+str(head_threshold), 'rb') as g:
        trn_head_X = pkl.load(g)
    with open(ds_path+'/mlc2seq/train_heads_Y-'+str(head_threshold), 'rb') as g:
        trn_head_Y = pkl.load(g)
    with open(ds_path+'/mlc2seq/test_heads_X-'+str(head_threshold), 'rb') as g:
        test_head_X = pkl.load(g)
    with open(ds_path+'/mlc2seq/test_heads_Y-'+str(head_threshold), 'rb') as g:
        test_head_Y = pkl.load(g)
    answer = smat.load_npz(ds_path+'/Y.tst.npz')
    with open(ds_path+'/mlc2seq/heads-'+str(head_threshold), 'rb') as g:
        heads = pkl.load(g)
    output_dim = len(heads)
    bert = BertClassifier(hypes, heads, head_threshold, device_num, ft, args.epochs, max_seq_len=256)
    trn_X_path = ds_path+'/head_data/trn_X-' + str(head_threshold)
    test_X_path = ds_path+'/head_data/test_X-' + str(head_threshold)
    trn_X = load_data(trn_X_path, trn_head_X, bert)
    test_X = load_data(test_X_path, test_head_X, bert)
    print('Number of labels:', len(heads))
    print('Number of trn instances:', len(trn_X))
    if args.is_train:
        if ft:
            print('======================Start Fine-Tuning======================')
            model_path = '../save_models/head_classifier/'+hypes.dataset+'/t-'+str(head_threshold)+'_ep-' + str(ft_from)+'/pytorch_model.bin'
            bert.train(trn_X, trn_head_Y, test_X, test_head_Y, model_path, ft_from)
        else:
            print('======================Start Training======================')
            bert.train(trn_X, trn_head_Y, test_X, test_head_Y)
            # bert.save()
    else:
        model_path = '../save_models/head_classifier/'+args.dataset+'/t-'+str(head_threshold)+'_ep-'+str(ft_from)+'/pytorch_model.bin'
        print('======================Start Testing======================')
        bert.evaluate(test_X, test_head_Y, model_path)
Ejemplo n.º 13
0
def get_flops_for_cell_models_from_embeddings():
    tf.compat.v1.disable_eager_execution()
    dir_path = os.path.join(evo_dir, 'cell_evo_benchmarks')
    dataset = DatasetGenerator.get_task_dataset(dir_path)
    hyperparameters = Hyperparameters()

    embeddings = [
        MetaModel.get_nasnet_embedding(),
        MetaModel.get_s1_embedding(),
        MetaModel.get_identity_embedding(),
        MetaModel.get_m1_sep3_embedding(),
        MetaModel.get_m1_sep7_embedding()
    ]

    flops = []

    for e in embeddings:
        e_flops = []
        metamodel = MetaModel(hyperparameters)
        metamodel.populate_from_embedding(e)

        steps_per_epoch = math.ceil(
            len(dataset.train_labels) /
            metamodel.hyperparameters.parameters['BATCH_SIZE'])
        total_steps = metamodel.hyperparameters.parameters[
            'TRAIN_ITERATIONS'] * metamodel.hyperparameters.parameters[
                'TRAIN_EPOCHS'] * steps_per_epoch
        for meta_cell in metamodel.cells:
            drop_path_tracker = DropPathTracker(
                metamodel.hyperparameters.parameters['DROP_PATH_CHANCE'], 0,
                total_steps)
            first_cell = CellDataHolder(
                3, metamodel.hyperparameters.parameters['TARGET_FILTER_DIMS'],
                meta_cell, False, drop_path_tracker, 0.)

            cell_model = build_cell_model(
                first_cell, dataset.images_shape,
                metamodel.hyperparameters.parameters['TARGET_FILTER_DIMS'],
                metamodel.hyperparameters.parameters['MAXIMUM_LEARNING_RATE'])
            e_flops.append(
                get_flops_for_keras_model(cell_model, dataset.images_shape))
            tf.keras.backend.clear_session()
            del cell_model

        flops.append(e_flops)
        print(flops)

    print(flops)
Ejemplo n.º 14
0
def test_model_accuracy_from_embedding(dir_name, embedding):
    dir_path = os.path.join(evo_dir, dir_name)
    # dataset = ImageDataset.get_cifar10_reduced()
    dataset = ImageDataset.get_cifar10()

    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    hyperparameters = Hyperparameters()

    model = MetaModel(hyperparameters)

    model.populate_from_embedding(embedding)

    model.build_model(dataset.images_shape)
    model.evaluate(dataset)
    model.save_model(dir_path)
    model.generate_graph(dir_path)
    model.save_metadata(dir_path)
    model.clear_model()
Ejemplo n.º 15
0
def test_accuracy_at_different_train_amounts():
    dir_path = os.path.join(evo_dir, 'test_accuracy_epochs')
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    hyperparameters = Hyperparameters()
    hyperparameters.parameters['POPULATION_SIZE'] = 32
    hyperparameters.parameters['ROUNDS'] = 0
    hyperparameters.parameters['TRAIN_EPOCHS'] = 1
    hyperparameters.parameters['TRAIN_ITERATIONS'] = 16

    dataset = ImageDataset.get_cifar10()

    existing_sims = [
        x for x in os.listdir(dir_path) if 'small' not in x and '.png' not in x
    ]

    num_already_done = len(existing_sims)
    num_remaining = hyperparameters.parameters[
        'POPULATION_SIZE'] - num_already_done
    total_todo = hyperparameters.parameters['POPULATION_SIZE']
    population = []
    for round_num in range(num_remaining):
        print(
            f'Evaluating model {round_num + 1 + num_already_done} of {total_todo}'
        )
        new_candidate = MetaModel(hyperparameters)
        new_candidate.populate_with_nasnet_metacells()
        new_candidate.model_name = 'evo_' + str(
            time.time()
        )  # this is redone here since all models are initialized within microseconds of eachother for init population
        new_candidate.build_model(dataset.images_shape)
        new_candidate.evaluate(dataset)
        new_candidate.save_model(dir_path)
        # new_candidate.metrics.metrics['accuracy'].extend([x + round_num for x in range(4)])
        new_candidate.save_metadata(dir_path)
        population.append(new_candidate)
        new_candidate.clear_model()
def main():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument("-ds", "--dataset", default="AmazonCat-13K", type=str, required=True)
    parser.add_argument("-t", "--tail_threshold", default=100, type=int)
    parser.add_argument("-le", "--label_embs", default='elmo', type=str)
    parser.add_argument("-gpu", "--device_num", default='1', type=str)
    parser.add_argument("-train", "--is_train", default=1, type=int)
    parser.add_argument("-ep", "--epochs", default=8, type=int)
    parser.add_argument("-ft", "--fine_tune", default=0, type=int)
    parser.add_argument("-from", "--ft_from", default=0, type=int)
    args = parser.parse_args()

    hypes = Hyperparameters(args.dataset)
    ds_path = '../datasets/' + args.dataset
    tail_threshold = args.tail_threshold
    device_num = args.device_num
    label_embs = args.label_embs
    fine_tune = args.fine_tune
    epochs = args.epochs
    is_train = args.is_train
    ft = (args.fine_tune == 1)
    ft_from = args.ft_from
    graphfile = ds_path+'/label_graph'
    label_graph = nx.read_edgelist(graphfile, create_using=nx.Graph())
    with open(ds_path+'/mlc2seq/heads-'+str(tail_threshold), 'rb') as g:
        heads = pkl.load(g)
    with open(ds_path+'/mlc2seq/train_heads_Y-'+str(tail_threshold), 'rb') as g:
        trn_head_Y = pkl.load(g)
    with open(ds_path+'/mlc2seq/test_heads_Y-'+str(tail_threshold), 'rb') as g:
        test_head_Y = pkl.load(g)
    answer = smat.load_npz(ds_path+'/Y.tst.npz')
    with open(ds_path+'/mlc2seq/tails-'+str(tail_threshold), 'rb') as g:
        tails = pkl.load(g)
    bertReg = BertRegressor(hypes, tails, label_embs, tail_threshold, device_num, epochs, ft, max_seq_len=hypes.max_seq_len)
    trn_X_path = ds_path+'/tail_data/trn_X-' + str(tail_threshold)
    test_X_path = ds_path+'/tail_data/test_X-' + str(tail_threshold)

    if label_embs == 'elmo' or label_embs == 'en':
        label_space = smat.load_npz(ds_path+'/L.elmo.npz')
        label_space = smat.lil_matrix(label_space)
        label_space[:tails[0]] = 9999
        trn_Y_path = ds_path+'/tail_data/trn_elmo_Y-' + str(tail_threshold)
        test_Y_path = ds_path+'/tail_data/test_elmo_Y-' + str(tail_threshold)
        with open(ds_path+'/mlc2seq/train_tails_X-'+str(tail_threshold), 'rb') as g:
            trn_tail_X = pkl.load(g)
        with open(ds_path+'/mlc2seq/train_tails_Y-'+str(tail_threshold), 'rb') as g:
            trn_tail_Y = pkl.load(g)
        trn_X, trn_Y, trn_Y_nums = load_data(trn_X_path, trn_Y_path, trn_tail_X, trn_tail_Y, bertReg, label_space, 'elmo')
        del(trn_tail_X, trn_tail_Y)
        with open(ds_path+'/mlc2seq/test_tails_X-'+str(tail_threshold), 'rb') as g:
            test_tail_X = pkl.load(g)
        with open(ds_path+'/mlc2seq/test_tails_Y-'+str(tail_threshold), 'rb') as g:
            test_tail_Y = pkl.load(g)
        test_X, test_Y, test_Y_nums = load_data(test_X_path, test_Y_path, test_tail_X, test_tail_Y, bertReg, label_space, 'elmo')
        del(test_tail_X, test_tail_Y)
    elif label_embs == 'n2v':
        lv = KV.load_word2vec_format(ds_path+'/label_n2v_embedding', binary=False)
        trn_Y_path = ds_path+'/tail_data/trn_n2v_Y-' + str(tail_threshold)
        test_Y_path = ds_path+'/tail_data/test_n2v_Y-' + str(tail_threshold)
        trn_X, trn_Y, trn_Y_nums = load_data(trn_X_path, trn_Y_path, trn_tail_X, trn_tail_Y, bertReg, lv, 'n2v')
        label_space = smat.csr_matrix(np.matrix(trn_Y))
        test_X, test_Y, test_Y_nums = load_data(test_X_path, test_Y_path, test_tail_X, test_tail_Y, bertReg, lv, 'n2v')
    else:
        print('invalid label embedding type')
        exit()
    print('Number of instnaces:', len(trn_X))
    print('Number of tail labels:', len(tails))

    if is_train:
        if ft:
            print('======================Start Fine-tuning======================')
            model_path = '../save_models/tail_regressor/'+hypes.dataset+'/t-'+str(tail_threshold)+'_ep-' + str(ft_from)+'-elmo/pytorch_model.bin'
            bertReg.train(trn_X, trn_Y, trn_Y_nums, label_space, test_X, test_Y, test_Y_nums, model_path, ft_from)
        else:
            print('======================Start Training======================')
            bertReg.train(trn_X, trn_Y, trn_Y_nums, label_space, test_X, test_Y, test_Y_nums)
        output_dir = '../save_models/tail_regressor/'+hypes.dataset+'/t-'+str(tail_threshold)+'_ep-' + str(epochs + ft_from) +'-'+ label_embs+'/'
        bertReg.save(output_dir)
        accs =  bertReg.evaluate(test_X, test_Y, test_Y_nums, label_space)
    else:
        model_path = '../save_models/tail_regressor/' + hypes.dataset + '/t-' +str(tail_threshold)+'_ep-' + str(ft_from)+'-'+label_embs+'/pytorch_model.bin'
        print('======================Start Testing======================')
        accs =  bertReg.evaluate(test_X, test_Y, test_Y_nums, label_space, model_path, ft_from)
Ejemplo n.º 17
0
def cell_performance_test_2():
    dir_path = os.path.join(evo_dir, 'dataset_gen')

    hyperparameters = Hyperparameters()
    hyperparameters.parameters['TARGET_FILTER_DIMS'] = 64

    mods = [
        # ObjectModifier.SizeModifier,
        # ObjectModifier.PerspectiveModifier,
        # ObjectModifier.RotationModifier,
        # ObjectModifier.ColorModifier
    ]
    DatasetGenerator.build_task_dataset(20000, (32, 32),
                                        10,
                                        4,
                                        2,
                                        dir_path,
                                        modifiers=mods,
                                        max_depth_of_target=1)
    dataset = DatasetGenerator.get_task_dataset(dir_path)

    cell_samples = 50
    num_cells = 1

    steps_per_epoch = math.ceil(
        len(dataset.train_labels) / hyperparameters.parameters['BATCH_SIZE'])
    total_steps = hyperparameters.parameters[
        'TRAIN_ITERATIONS'] * hyperparameters.parameters[
            'TRAIN_EPOCHS'] * steps_per_epoch

    def test_model():
        metamodel = MetaModel(hyperparameters)
        metamodel.populate_with_nasnet_metacells()

        drop_path_tracker = DropPathTracker(
            hyperparameters.parameters['DROP_PATH_CHANCE'], 0, total_steps)
        first_cell = CellDataHolder(
            3, hyperparameters.parameters['TARGET_FILTER_DIMS'],
            metamodel.cells[0], False, drop_path_tracker, 0.)

        def get_model():
            cell_input = tf.keras.Input(dataset.images_shape)
            cell_output = tf.keras.layers.Conv2D(
                hyperparameters.parameters['TARGET_FILTER_DIMS'], 1, 1,
                'same')(cell_input)
            cell_output = first_cell.build([cell_output, cell_output])
            cell_output = cell_output[0]
            cell_output = tf.keras.layers.Lambda(lambda x: tf.reduce_mean(
                input_tensor=x, axis=[1, 2]))(cell_output)
            cell_output = tf.keras.layers.Dropout(.5)(cell_output)
            cell_output = tf.keras.layers.Dense(10)(cell_output)
            model = tf.keras.Model(inputs=cell_input, outputs=cell_output)
            optimizer = tf.keras.optimizers.Adam(
                learning_rate=hyperparameters.
                parameters['MAXIMUM_LEARNING_RATE'])
            model.compile(optimizer=optimizer,
                          loss=tf.keras.losses.SparseCategoricalCrossentropy(
                              from_logits=True),
                          metrics=['accuracy'])
            return model

        accuracies = []
        for i in range(cell_samples):
            cell_model = get_model()
            cell_model.fit(dataset.train_images,
                           dataset.train_labels,
                           shuffle=True,
                           batch_size=hyperparameters.parameters['BATCH_SIZE'],
                           epochs=1,
                           callbacks=[drop_path_tracker])
            model_accuracies = []
            for test_set_index in range(len(dataset.test_set_images)):
                accuracy = cell_model.evaluate(
                    dataset.test_set_images[test_set_index],
                    dataset.test_set_labels[test_set_index])[-1]
                print(
                    f'{dataset.test_set_names[test_set_index]} test set accuracy: {accuracy}'
                )
                model_accuracies.append(accuracy)
            # accuracy = cell_model.evaluate(dataset.test_images, dataset.test_labels)[-1]
            # accuracies.append(accuracy)
            accuracies.append(model_accuracies)
            tf.keras.backend.clear_session()
            del cell_model

        return accuracies, metamodel.get_embedding()

    all_accuracies = []
    all_embeddings = []

    for x in range(num_cells):
        accuracies, embedding = test_model()
        all_accuracies.append(accuracies)
        all_embeddings.append(embedding)

    all_accuracies = np.array(all_accuracies)
    print(f'all accuracies shape: {all_accuracies.shape}')

    for i in range(len(all_accuracies)):
        print(
            f'embedding: {all_embeddings[i]}, avgs: {np.mean(all_accuracies[i], axis=0)}'
        )

    output_path = os.path.join(dir_path, 'results.json')
    data = {
        'embeddings': all_embeddings,
        'accuracies': all_accuracies.tolist()
    }
    with open(output_path, 'w+') as fl:
        json.dump(data, fl, indent=4)
Ejemplo n.º 18
0
def run_test(dir_name):
    cell_samples = 16
    base_population = 8
    evolved_population = 24

    mods = [
        ObjectModifier.SizeModifier, ObjectModifier.PerspectiveModifier,
        ObjectModifier.RotationModifier, ObjectModifier.ColorModifier
    ]
    hyperparameters = Hyperparameters()

    dir_path = os.path.join(evo_dir, dir_name)
    results_path = os.path.join(dir_path, 'results.json')

    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    # load dataset, or create a new one if one doesn't exist
    dataset_exists = os.path.exists(dir_path) and 'dataset.npy' in os.listdir(
        dir_path)
    if not dataset_exists:
        print('Generating dataset')
        DatasetGenerator.build_task_dataset(20000, (32, 32),
                                            10,
                                            4,
                                            2,
                                            dir_path,
                                            modifiers=mods,
                                            max_depth_of_target=1)
    dataset = DatasetGenerator.get_task_dataset(dir_path)

    # load previous test results if they exist
    data = {'embeddings': [], 'accuracies': []}
    if os.path.exists(results_path):
        with open(results_path, 'r') as fl:
            data = json.load(fl)

    def save_data():
        with open(results_path, 'w+') as fl:
            json.dump(data, fl, indent=4)

    def get_average_accuracy(model_index: int, cell_index: int):
        return np.mean(data['accuracies'][model_index][cell_index], axis=0)

    existing_population_size = len(data['embeddings'])
    remaining_base_population = 0 if existing_population_size > base_population else base_population - existing_population_size
    remaining_evolved_population = evolved_population if existing_population_size < base_population else evolved_population - (
        existing_population_size - base_population)

    print(
        f'Evaluating {remaining_base_population} base candidates ({base_population - remaining_base_population}/{base_population} done) '
        f'and {remaining_evolved_population} evolved candidates ({evolved_population - remaining_evolved_population}/{evolved_population} done)'
    )

    for i in range(remaining_base_population):
        print(
            f'Evaluating candidates {i} of {remaining_base_population} base candidates'
        )
        metamodel = MetaModel(hyperparameters)
        metamodel.populate_with_nasnet_metacells()
        accuracies = test_model(metamodel, dataset, cell_samples)
        data['embeddings'].append(metamodel.get_embedding())
        data['accuracies'].append(accuracies)
        save_data()

    performances = [performance(x) for x in data['accuracies']]

    def find_best_indexes():
        best_performances = np.full(performances[0].shape,
                                    1.,
                                    dtype=np.float32)
        best_indexes = np.zeros(performances[0].shape, dtype=np.int)
        for performance_index, x in enumerate(performances):
            for i, entry in enumerate(x):
                if best_performances[i] > entry:
                    best_performances[i] = entry
                    best_indexes[i] = performance_index

        return best_indexes

    for i in range(remaining_evolved_population):
        print(
            f'Evaluating candidates {i} of {remaining_evolved_population} evolved candidates'
        )
        best_indexes = find_best_indexes()
        print(f'best indexes: {best_indexes}')
        combined_embeddings = combine_embeddings(
            data['embeddings'][best_indexes[0]],
            data['embeddings'][best_indexes[1]])
        mutated_embeddings = mutate_cell_from_embedding(combined_embeddings, 0)
        mutated_embeddings = mutate_cell_from_embedding(mutated_embeddings, 1)
        metamodel = MetaModel(hyperparameters)
        metamodel.populate_from_embedding(mutated_embeddings)
        accuracies = test_model(metamodel, dataset, cell_samples)
        data['embeddings'].append(metamodel.get_embedding())
        data['accuracies'].append(accuracies)
        performances.append(performance(accuracies))
        save_data()
Ejemplo n.º 19
0
def main():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument("-ds",
                        "--dataset",
                        default="AmazonCat-13K",
                        type=str,
                        required=True)
    # parser.add_argument("-t", "--head_threshold", default=10000, type=int)
    parser.add_argument("-k", "--k", default=256, type=int)
    parser.add_argument("-gpu", "--device_num", default='0', type=str)
    parser.add_argument("-train", "--is_train", default=1, type=int)
    parser.add_argument("-ep", "--epochs", default=8, type=int)
    parser.add_argument("-ft", "--fine_tune", default=0, type=int)
    parser.add_argument("-clu", "--clustering", default='spec', type=str)
    parser.add_argument("-from", "--ft_from", default=0, type=int)
    parser.add_argument("-dlf", "--dlf", default=1, type=int)
    args = parser.parse_args()

    ft = (args.fine_tune == 1)
    dlf = (args.dlf == 1)
    ft_from = args.ft_from
    num_clusters = args.k
    hypes = Hyperparameters(args.dataset)
    ds_path = '../datasets/' + args.dataset
    device_num = args.device_num

    with open(ds_path + '/mlc2seq/train_clus_X', 'rb') as g:
        trn_clus_X = pkl.load(g)
    with open(ds_path + '/mlc2seq/train_clus_Y', 'rb') as g:
        trn_clus_Y = pkl.load(g)
    with open(ds_path + '/mlc2seq/test_clus_X', 'rb') as g:
        test_clus_X = pkl.load(g)
    with open(ds_path + '/mlc2seq/test_clus_Y', 'rb') as g:
        test_clus_Y = pkl.load(g)
    answer = smat.load_npz(ds_path + '/Y.tst.npz')

    label_space = load_label(ds_path)

    output_dim = label_space.shape[0]
    gutil = GraphUtil(trn_clus_Y, output_dim)
    adj = gutil.gen_graph()
    # gutil.cal_degree()
    lc = LabelCluster(adj, num_clusters)
    # c_adj: k*k, C: m*k
    print('partitioning labels with ' + args.clustering + ' clustering...')
    clus_path = ds_path + '/clus_data/k-' + str(num_clusters)
    if args.clustering == 'kmeans':
        C, c_adj = lc.kmeans_clustering(trn_clus_Y, label_space, clus_path)
    elif args.clustering == 'spec':
        C, c_adj = lc.spec_clustering(trn_clus_Y, clus_path)
    else:
        C, c_adj = lc.spec_clustering(trn_clus_Y, clus_path)
    bert = BertGCN_ClusterClassifier(hypes,
                                     device_num,
                                     ft,
                                     args.epochs,
                                     label_space,
                                     C,
                                     c_adj,
                                     dlf,
                                     max_seq_len=256)
    trn_X_path = ds_path + '/clus_data/trn_X'
    test_X_path = ds_path + '/clus_data/test_X'
    trn_X = load_data(trn_X_path, trn_clus_X, bert)
    test_X = load_data(test_X_path, test_clus_X, bert)
    print('Number of labels:', output_dim)
    print('Number of trn instances:', len(trn_X))

    if args.is_train:
        if ft:
            print(
                '======================Start Fine-Tuning======================'
            )
            model_path = '/mnt/sdb/yss/xbert_save_models/gcn_dlf/' + hypes.dataset + '/ep-' + str(
                ft_from) + '/k-' + str(num_clusters) + '/pytorch_model.bin'
            # model_path = '../save_models/gcn_dlf/'+hypes.dataset+'/ep-' + str(ft_from) + '/k-' +  str(num_clusters) + '/pytorch_model.bin'
            bert.train(trn_X, trn_clus_Y, test_X, test_clus_Y, model_path,
                       ft_from)
        else:
            print('======================Start Training======================')
            bert.train(trn_X, trn_clus_Y, test_X, test_clus_Y)
        bert.evaluate(test_X, test_clus_Y)
        output_dir = '/mnt/sdb/yss/xbert_save_models/gcn_dlf/' + hypes.dataset + '/ep-' + str(
            args.epochs + ft_from) + '/k-' + str(num_clusters) + '/'
        # output_dir = '../save_models/gcn_dlf/'+hypes.dataset+'/ep-' + str(args.epochs + ft_from) + '/k-' + str(num_clusters) + '/'
        bert.save(output_dir)

    else:
        import datetime
        # model_path = '../save_models/gcn_dlf/'+hypes.dataset+'/ep-' + str(ft_from) + '/k-' +  str(num_clusters) + '/pytorch_model.bin'
        model_path = '/mnt/sdb/yss/xbert_save_models/gcn_dlf/' + hypes.dataset + '/ep-' + str(
            ft_from) + '/k-' + str(num_clusters) + '/pytorch_model.bin'
        print('======================Start Testing======================')
        start = datetime.datetime.now()
        bert.evaluate(test_X, test_clus_Y, model_path)
        end = datetime.datetime.now()
        elapsed = end - start
        print(elapsed.seconds, ":", elapsed.microseconds)
Ejemplo n.º 20
0
device = None
if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
import numpy as np
import matplotlib.pyplot as plt
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from Trainer import Trainer
from Hyperparameters import Hyperparameters as Hyp
# Set up scheduler breakpoints
step_down = [1, 5, 10, 20]
# Set up trainer
optim_params = Hyp()
optim_params.register('lr')
optim_params.set_value('lr', 1e-4)
for ind in range(2):
    for step in step_down:
        optim_params = Hyp()
        optim_params.register('lr')
        optim_params.set_value('lr', 1e-4)
        if ind == 1:
            optim_params.register('weight_decay')
            optim_params.set_value('weight_decay', 3e-2)
        tr = Trainer(ResNet50, 'data', device, batch_size=128)
        tr.set_hyperparameters(optim_params)
        tr.set_criterion(CrossEntropyLoss)
        tr.set_optimizer(Adam)
        tr.set_scheduler(MultiStepLR)
Ejemplo n.º 21
0
hidden = Neural_network_layer(8000,Activation_function.sigmoid, Dropout.drop_activation,
                               frequency = [0], dropout = 0.5, dropout_decay=[0.2], batch_size = batch_size)

#hidden2 = Neural_network_layer(8000,Activation_function.sigmoid, Dropout.drop_activation,
#                              frequency = 100, dropout = 0.5, dropout_decay=0.2, batch_size = batch_size)

output = Neural_network_layer(10,Activation_function.softmax, Dropout.drop_none, 0,0, batch_size = batch_size, frequency = 0)

layers = [input, hidden,
          #hidden2,
          output]

H.layers = layers

H.set_learning_parameters(epochs = 100, 
                   learning_rate = 0.1, use_learning_rate_decay = False, learning_rate_decay = 0.01,
                    use_momentum = True, momentum = 0.5,transient_phase = 1500,
                    use_momentum_increase = True, momentum_increase = 0.001, use_bias = True)

H.set_standard_regularization_parameters(
                      use_early_stopping = True,
                      use_L2 = True, L2_penalty = 0.0001,
                      use_weight_decay = False, weight_decay = 0.05,weight_decay_decay = 0.01)


H.set_stopping_and_ensemble_parameter(use_ensemble = False, ensemble_count = 15,
                                    safe_weights_threshold = 0.011, epochs_force_reinitilization = 260)

H.set_initial_parameters()

nn = Neural_network(result_dir,data_X, data_y, data_t, data_X_val, data_y_val, data_t_val, X_test,
                    layers = layers, 
Ejemplo n.º 22
0
import torch.nn as nn
import torch.nn.functional as F
from ResnetModels import ResNet, ResNet50
# Choose device
USE_GPU = True

device = None
if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
import numpy as np
import matplotlib.pyplot as plt
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from Trainer import Trainer
from Hyperparameters import Hyperparameters as Hyp
# Set up trainer
tr = Trainer(ResNet50, 'data', device, batch_size=128)
optim_params = Hyp()
optim_params.register('lr')
optim_params.set_value('lr', 1e-5)
optim_params.register('weight_decay')
optim_params.set_range('weight_decay', -3, -1)
tr.set_hyperparameters(optim_params)
tr.set_criterion(CrossEntropyLoss)
tr.set_optimizer(Adam)
tr.prime_optimizer()
tr.prime_model(pretrained=True)
tr.hyp_opt(epochs=7, iters=25)
#tr.train(epochs=1, save_every=20, update_every=1)
Ejemplo n.º 23
0
class MetaModel(SerialData):
    def __init__(self, hyperparameters: Hyperparameters = Hyperparameters()):
        super().__init__()
        self.cells: List[MetaCell] = []  # [NORMAL_CELL, REDUCTION_CELL]
        self.hyperparameters = hyperparameters

        self.model_name = 'evo_' + str(time.time())
        self.parent_model_name = ''
        self.metrics = Metrics()
        self.fitness = 0.

        self.keras_model: tf.keras.Model = None
        self.keras_model_data: ModelDataHolder = None

    def container_name(self):
        return self.model_name + '_container'

    def populate_with_nasnet_metacells(self):
        groups_in_block = 5
        ops_in_group = 2
        group_inputs = 2

        def get_cell():
            cell = MetaCell(group_inputs)
            cell.groups = [MetaGroup() for _ in range(groups_in_block)]
            for i in range(groups_in_block):
                cell.groups[i].operations = [
                    MetaOperation(i + group_inputs)
                    for _ in range(ops_in_group)
                ]  # +2 because 2 inputs for cell, range(2) because pairwise groups
                for j in range(ops_in_group):
                    cell.groups[i].operations[j].actual_attachment = min(
                        j, group_inputs - 1)
            return cell

        def randomize_cell(cell: MetaCell):
            for group_ind, group in enumerate(cell.groups):
                # do hidden state randomization for all but first groups
                if group_ind > 0:
                    for op in group.operations:
                        op.actual_attachment = int(np.random.random() *
                                                   op.attachment_index)

                # do op randomization for all groups
                for op in group.operations:
                    op.operation_type = int(np.random.random() *
                                            OperationType.SEP_1X7_7X1)

        normal_cell = get_cell()
        reduction_cell = get_cell()

        randomize_cell(normal_cell)
        randomize_cell(reduction_cell)

        self.cells.append(normal_cell)
        self.cells.append(reduction_cell)

    def mutate(self):
        cell_index, group_index, item_index, mutation_type, mutation_subtype = self.select_mutation(
        )
        self.apply_mutation(cell_index, group_index, item_index, mutation_type,
                            mutation_subtype)

    def select_mutation(self):
        cell_index = int(np.random.random() * len(self.cells))
        select_block = self.cells[cell_index]
        group_index = int(np.random.random() * len(select_block.groups))
        select_group = select_block.groups[group_index]
        item_index = int(np.random.random() * len(select_group.operations))
        mutation_type = np.random.random()
        mutation_subtype = np.random.random()

        return cell_index, group_index, item_index, mutation_type, mutation_subtype

    def apply_mutation(self, cell_index, group_index, item_index,
                       mutation_type, mutation_subtype):
        other_mutation_threshold = (
            (1. - self.hyperparameters.parameters['IDENTITY_THRESHOLD']) /
            2.) + self.hyperparameters.parameters['IDENTITY_THRESHOLD']
        select_block = self.cells[cell_index]
        select_group = select_block.groups[group_index]
        select_item = select_group.operations[item_index]

        mutation_string = f'mutating cell {cell_index}, group {group_index}, item {item_index}: '
        if mutation_type < self.hyperparameters.parameters[
                'IDENTITY_THRESHOLD']:
            # identity mutation
            print(mutation_string + 'identity mutation')
            return

        if self.hyperparameters.parameters[
                'IDENTITY_THRESHOLD'] < mutation_type < other_mutation_threshold:
            # hidden state mutation = change inputs

            # don't try to change the state of the first group since it need to point to the first two inputs of the block
            if group_index != 0:
                previous_attachment = select_item.actual_attachment
                new_attachment = previous_attachment
                # ensure that the mutation doesn't result in the same attachment as before
                while new_attachment == previous_attachment:
                    new_attachment = int(
                        mutation_subtype *
                        select_item.attachment_index)  #TODO: EXCLUSIVE RANDOM

                if self.keras_model_data is not None:
                    self.keras_model_data.hidden_state_mutation(
                        self.hyperparameters, cell_index, group_index,
                        item_index, new_attachment, select_item.operation_type)
                select_item.actual_attachment = new_attachment
                print(
                    mutation_string +
                    f'hidden state mutation from {previous_attachment} to {select_item.actual_attachment}'
                )
            else:
                print(mutation_string + f'skipping state mutation for group 0')

        else:
            # operation mutation
            previous_op = select_item.operation_type
            select_item.operation_type = int(mutation_subtype *
                                             (OperationType.SEP_1X7_7X1 + 1))
            if previous_op != select_item.operation_type and self.keras_model_data is not None:
                self.keras_model_data.operation_mutation(
                    self.hyperparameters, cell_index, group_index, item_index,
                    select_item.operation_type)
            print(
                mutation_string +
                f'operation type mutation from {previous_op} to {select_item.operation_type}'
            )

        initial_layer_shape = self.keras_model.layers[0].get_input_shape_at(
            0)[1:]

        self.keras_model = None
        self.build_model(initial_layer_shape, False)

    def serialize(self) -> dict:
        # return {
        #     'blocks': [x.serialize() for x in self.cells],
        #     'metrics': self.metrics.serialize(),
        #     'hyperparameters': self.hyperparameters.serialize(),
        #     'model_name': self.model_name,
        #     'parent_model_name': self.parent_model_name
        # }
        return {
            'embedding': self.get_embedding(),
            'hyperparameters': self.hyperparameters.serialize(),
            'model_name': self.model_name,
            'metrics': self.metrics.serialize(),
            'parent_model_name': self.parent_model_name
        }

    def deserialize(self, obj: dict) -> None:
        # for block in obj['blocks']:
        #     item = MetaCell()
        #     item.deserialize(block)
        #     self.cells.append(item)
        self.populate_from_embedding(obj['embedding'])
        self.model_name = obj['model_name']
        self.metrics = Metrics()
        self.metrics.deserialize(obj['metrics'])
        self.hyperparameters = Hyperparameters()
        self.hyperparameters.deserialize(obj['hyperparameters'])
        if 'parent_model_name' in obj:
            self.parent_model_name = obj['parent_model_name']
        else:
            self.parent_model_name = ''

    def build_model(self, input_shape, use_new_weights: bool = True) -> None:
        if self.keras_model is None:
            print('Creating new keras model')
            build_time = time.time()
            if self.keras_model_data is None or use_new_weights:
                self.keras_model_data = ModelDataHolder(self)
            model_input = tf.keras.Input(input_shape)
            self.keras_model = self.keras_model_data.build(model_input)
            build_time = time.time() - build_time
            # optimizer = tf.keras.optimizers.Adam(self.hyperparameters.parameters['MAXIMUM_LEARNING_RATE'])
            optimizer = tf.keras.optimizers.SGD(
                self.hyperparameters.parameters['MAXIMUM_LEARNING_RATE'])

            compile_time = time.time()
            self.keras_model.compile(
                optimizer=optimizer,
                loss=tf.keras.losses.SparseCategoricalCrossentropy(
                    from_logits=True),
                metrics=['accuracy'])
            compile_time = time.time() - compile_time

            self.metrics.metrics['build_time'] = build_time
            self.metrics.metrics['compile_time'] = compile_time
        else:
            print('reusing previous keras model')

    def evaluate(self,
                 dataset: ImageDataset,
                 save_interval: int = 0,
                 save_path: str = None) -> None:
        batch_size = self.hyperparameters.parameters['BATCH_SIZE']
        min_lr = self.hyperparameters.parameters['MINIMUM_LEARNING_RATE']
        max_lr = self.hyperparameters.parameters['MAXIMUM_LEARNING_RATE']

        sgdr = SGDR(min_lr, max_lr, batch_size, len(dataset.train_labels),
                    self.hyperparameters.parameters['SGDR_EPOCHS_PER_RESTART'],
                    self.hyperparameters.parameters['SGDR_LR_DECAY'],
                    self.hyperparameters.parameters['SGDR_PERIOD_DECAY'])

        completed_iterations = len(self.metrics.metrics['accuracy'])
        completed_epochs = completed_iterations * self.hyperparameters.parameters[
            'TRAIN_EPOCHS']
        if completed_epochs != 0:
            sgdr.init_after_epochs(completed_epochs)

        callbacks = [self.keras_model_data.drop_path_tracker]
        if self.hyperparameters.parameters['USE_SGDR']:
            callbacks.append(sgdr)

        for iteration in range(
                completed_iterations,
                self.hyperparameters.parameters['TRAIN_ITERATIONS']):
            print(f'Starting training iteration {iteration}')
            train_time = time.time()
            for epoch_num in range(
                    int(self.hyperparameters.parameters['TRAIN_EPOCHS'])):
                self.keras_model.fit(dataset.train_images,
                                     dataset.train_labels,
                                     shuffle=True,
                                     batch_size=batch_size,
                                     epochs=1,
                                     callbacks=callbacks)
            train_time = time.time() - train_time

            inference_time = time.time()
            evaluated_metrics = self.keras_model.evaluate(
                dataset.test_images, dataset.test_labels)
            inference_time = time.time() - inference_time

            self.metrics.metrics['accuracy'].append(
                float(evaluated_metrics[-1]))
            self.metrics.metrics['average_train_time'].append(
                train_time /
                float(self.hyperparameters.parameters['TRAIN_EPOCHS'] *
                      len(dataset.train_labels)))
            self.metrics.metrics['average_inference_time'].append(
                inference_time / float(len(dataset.test_images)))

            if save_interval != 0 and iteration % save_interval == 0 and save_path is not None:
                print('Checkpoint. Saving model...')
                self.save_metadata(save_path)
                self.save_model(save_path)

    def save_metadata(self, dir_path: str = model_save_dir):
        dir_name = os.path.join(dir_path, self.model_name)
        if not os.path.exists(dir_name):
            os.mkdir(dir_name)
        SerialData.write_serial_data_to_json(self, dir_name, self.model_name)

    def plot_model(self, dir_path):
        dir_name = os.path.join(dir_path, self.model_name)
        if not os.path.exists(dir_name):
            os.mkdir(dir_name)
        had_keras_model: bool = self.keras_model is not None
        if not had_keras_model:
            self.build_model([16, 16, 3])

        tf.keras.utils.plot_model(self.keras_model,
                                  os.path.join(dir_name,
                                               self.model_name + '.png'),
                                  expand_nested=True,
                                  show_layer_names=False,
                                  show_shapes=True)

        if not had_keras_model:
            self.clear_model()

    def save_model(self, dir_path: str = model_save_dir):
        if self.keras_model is not None:
            custom_objects = {
                'SeperableConvolutionOperation': SeperableConvolutionOperation,
                'AveragePoolingOperation': AveragePoolingOperation,
                'MaxPoolingOperation': MaxPoolingOperation,
                'DoublySeperableConvoutionOperation':
                DoublySeperableConvoutionOperation,
                'DimensionalityReductionOperation':
                DimensionalityChangeOperation,
                'IdentityOperation': IdentityOperation,
                'DenseOperation': DenseOperation,
                'Relu6Layer': Relu6Layer
            }

            print(f'saving graph for {self.model_name}')
            dir_name = os.path.join(dir_path, self.model_name)
            if not os.path.exists(dir_name):
                os.mkdir(dir_name)
            save_time = time.time()

            ModelUtilities.save_keras_model(self.keras_model, dir_name,
                                            self.model_name)
            save_time = time.time() - save_time
            self.metrics.metrics['save_time'] = save_time
            print(
                f'finished saving graph for {self.model_name} after {save_time} seconds'
            )

    def clear_model(self):
        if self.keras_model is not None:
            del self.keras_model
            self.keras_model = None
        self.keras_model_data = None
        print(f'finished clearing model for {self.model_name}')

    def produce_child(self) -> MetaModel:
        result: MetaModel = MetaModel(self.hyperparameters)
        result.cells = copy.deepcopy(self.cells)

        result.keras_model = self.keras_model
        result.keras_model_data = self.keras_model_data

        self.keras_model = None
        self.keras_model_data = None

        return result

    def load_model(self, dir_path: str = model_save_dir) -> bool:
        dir_name = os.path.join(dir_path, self.model_name)

        contained_files = os.listdir(dir_name)
        contains_keras_model = False

        for fl in contained_files:
            if len(fl) > 3 and fl[-3:] == '.h5':
                contains_keras_model = True

        if contains_keras_model:
            print(f'loading model for {self.model_name}')
            load_time = time.time()

            custom_objects = {
                'SeperableConvolutionOperation': SeperableConvolutionOperation,
                'AveragePoolingOperation': AveragePoolingOperation,
                'MaxPoolingOperation': MaxPoolingOperation,
                'DoublySeperableConvoutionOperation':
                DoublySeperableConvoutionOperation,
                'DimensionalityChangeOperation': DimensionalityChangeOperation,
                'IdentityReductionOperation': IdentityReductionOperation,
                'IdentityOperation': IdentityOperation,
                'DenseOperation': DenseOperation,
                'Relu6Layer': Relu6Layer,
                'DropPathOperation': DropPathOperation
            }
            self.keras_model = ModelUtilities.load_keras_model(
                dir_name, self.model_name, custom_objects)
            # print(self.keras_model.summary(line_length=200))
            self.keras_model_data = ModelDataHolder(self, self.keras_model)
            load_time = time.time() - load_time
            print(
                f'finished loading model for {self.model_name} in {load_time} seconds'
            )
            return True
        else:
            print(f'could not find keras model for {self.model_name}')
            return False

    @staticmethod
    def load(dir_path: str, name: str, load_graph: bool = False) -> MetaModel:
        # print(f'loading model, load_graph = {load_graph}')
        dir_name = os.path.join(dir_path, name)
        if not os.path.exists(dir_name):
            print('Model does not exist at specified location')
            return MetaModel()

        serial_data = SerialData.load_serial_data_from_json(dir_name, name)
        result = MetaModel()
        result.deserialize(serial_data)
        if load_graph:
            result.load_model(dir_path)

        return result

    def generate_graph(self, dir_path: str):
        print(f'Generating graph for {self.model_name}')
        graph = Digraph(comment='Model Architecture', format='png')

        for cell_index, cell in enumerate(self.cells):
            graph.node(f'{cell_index}_in', f'Cell Input {cell_index}')
            graph.node(f'{cell_index}_0', f'Previous Layer')
            graph.node(f'{cell_index}_1', f'Residual')
            graph.edge(f'{cell_index}_in', f'{cell_index}_0')
            graph.edge(f'{cell_index}_in', f'{cell_index}_1')
            for group_index, group in enumerate(cell.groups):
                graph.node(f'{cell_index}_{group_index + 2}',
                           f'Group Concat {cell_index}_{group_index}')
                for item_index, item in enumerate(group.operations):
                    graph.node(
                        f'{cell_index}_{group_index}_{item_index}',
                        f'{OperationType.lookup_string(item.operation_type)}')
                    graph.edge(f'{cell_index}_{item.actual_attachment}',
                               f'{cell_index}_{group_index}_{item_index}')
                    graph.edge(f'{cell_index}_{group_index}_{item_index}',
                               f'{cell_index}_{group_index + 2}')

            unused_nodes = cell.get_unused_group_indexes()
            graph.node(f'{cell_index}_out', 'Cell Output')
            for node in unused_nodes:
                graph.edge(f'{cell_index}_{node}', f'{cell_index}_out')

        graph.render(os.path.join(dir_path, self.model_name, 'graph.png'))

    def get_flops(self, dataset: ImageDataset):
        if self.keras_model is None:
            return 0

        # session = tf.compat.v1.get_default_session()
        session = tf.compat.v1.keras.backend.get_session()

        with session.as_default():
            input_img = tf.ones((1, ) + dataset.images_shape, dtype=tf.float32)
            output_image = self.keras_model(input_img)

            run_meta = tf.compat.v1.RunMetadata()

            _ = session.run(
                output_image,
                options=tf.compat.v1.RunOptions(
                    trace_level=tf.compat.v1.RunOptions.FULL_TRACE),
                run_metadata=run_meta,
                # feed_dict={input_img:np.reshape(dataset.test_images[0], (1,)+dataset.images_shape)}
            )

            opts = tf.compat.v1.profiler.ProfileOptionBuilder.float_operation()
            # opts['output'] = 'none'
            flops = tf.compat.v1.profiler.profile(run_meta=run_meta,
                                                  cmd='op',
                                                  options=opts)

            return flops.total_float_ops

        # run_meta = tf.compat.v1.RunMetadata()
        # opts = tf.compat.v1.profiler.ProfileOptionBuilder.float_operation()
        # flops = tf.compat.v1.profiler.profile(tf.compat.v1.keras.backend.get_session().graph, run_meta=run_meta, cmd='op', options=opts)
        # return flops.total_float_ops

    def get_embedding(self):
        embedding = []
        embedding.append(self.hyperparameters.parameters['NORMAL_CELL_N'])
        embedding.append(self.hyperparameters.parameters['CELL_LAYERS'])

        for cell in self.cells:
            for group in cell.groups:
                for op in group.operations:
                    embedding.append(op.operation_type)
                    embedding.append(op.actual_attachment)

        return embedding

    def populate_from_embedding(self, embedding):
        print(f'Populating model from embedding')
        num_cells = 2
        num_groups_per_cell = 5
        num_ops_per_group = 2
        num_cell_inputs = 2

        dup_embedding = embedding.copy()

        self.hyperparameters.parameters['NORMAL_CELL_N'] = dup_embedding[0]
        del dup_embedding[0]
        self.hyperparameters.parameters['CELL_LAYERS'] = dup_embedding[0]
        del dup_embedding[0]

        for cell_ind in range(num_cells):
            self.cells.append(MetaCell(num_cell_inputs))
            for group_ind in range(num_groups_per_cell):
                self.cells[cell_ind].groups.append(MetaGroup())
                for op_ind in range(num_ops_per_group):
                    self.cells[cell_ind].groups[group_ind].operations.append(
                        MetaOperation(num_cell_inputs + group_ind))
                    ref_op = self.cells[cell_ind].groups[group_ind].operations[
                        op_ind]
                    ref_op.operation_type = dup_embedding[0]
                    ref_op.actual_attachment = dup_embedding[1]
                    del dup_embedding[0]
                    del dup_embedding[0]

    def get_confusion_matrix(self, dataset):
        predictions = self.keras_model.predict(dataset.test_images,
                                               batch_size=32)

        predictions = ModelUtilities.softmax(predictions)
        predictions = np.argmax(predictions, axis=1)

        matrix = tf.math.confusion_matrix(dataset.test_labels,
                                          predictions,
                                          num_classes=10)

        matrix_val = None

        if not tf.executing_eagerly():
            with tf.compat.v1.Session().as_default():
                matrix_val = matrix.eval()
        else:
            matrix_val = matrix.numpy()

        return matrix_val

    def activation_viewer(self) -> tf.keras.Model:
        if self.keras_model is None or self.keras_model_data is None:
            return None

        parser = ModelParsingHelper()

        first_cell_reduce = self.keras_model.get_layer(
            parser.get_next_name('concatenate')).get_output_at(0)
        # first_cell_reduce = tf.keras.layers.Softmax()(first_cell_reduce)

        outputs = [first_cell_reduce]
        outputs.extend(self.keras_model.outputs)
        output_model = tf.keras.Model(inputs=self.keras_model.inputs,
                                      outputs=outputs)

        return output_model

    def process_stuff(self):
        return [x.process_stuff() for x in self.cells]

    @staticmethod
    def get_nasnet_embedding() -> List:
        return [
            5,
            3,
            OperationType.SEP_3X3,
            0,  # NORMAL CELL
            OperationType.IDENTITY,
            0,
            OperationType.SEP_3X3,
            1,
            OperationType.SEP_5X5,
            0,
            OperationType.AVG_3X3,
            0,
            OperationType.IDENTITY,
            1,
            OperationType.AVG_3X3,
            1,
            OperationType.AVG_3X3,
            1,
            OperationType.SEP_5X5,
            1,
            OperationType.SEP_3X3,
            1,
            OperationType.SEP_7X7,
            1,  # REDUCTION CELL
            OperationType.SEP_5X5,
            0,
            OperationType.MAX_3X3,
            0,
            OperationType.SEP_7X7,
            1,
            OperationType.AVG_3X3,
            0,
            OperationType.SEP_5X5,
            1,
            OperationType.MAX_3X3,
            0,
            OperationType.SEP_3X3,
            2,
            OperationType.AVG_3X3,
            2,
            OperationType.IDENTITY,
            3
        ]

    @staticmethod
    def get_identity_embedding() -> List:
        embedding = [5, 3]
        embedding.extend([0] * 40)
        return embedding

    @staticmethod
    def get_s1_embedding() -> List:
        return [
            5, 3, 6, 0, 0, 1, 0, 0, 6, 1, 4, 1, 3, 2, 5, 4, 5, 3, 2, 1, 0, 1,
            3, 0, 0, 1, 4, 2, 1, 0, 1, 1, 6, 3, 3, 4, 5, 0, 5, 3, 2, 4
        ]

    @staticmethod
    def get_m1_sep7_embedding() -> List:
        embedding = [5, 3]
        embedding.extend([OperationType.SEP_7X7, 0] * 20)
        return embedding

    @staticmethod
    def get_m1_sep3_embedding() -> List:
        embedding = [5, 3]
        embedding.extend([OperationType.SEP_3X3, 0] * 20)
        return embedding

    @staticmethod
    def get_m1_sep3_serial_embedding() -> List:
        embedding = [5, 3]
        for j in range(2):
            embedding.extend(
                [OperationType.SEP_3X3, 0, OperationType.SEP_3X3, 1])
            for i in range(1, 5):
                embedding.extend([
                    OperationType.SEP_3X3, i + 1, OperationType.SEP_3X3, i + 1
                ])
        return embedding
Ejemplo n.º 24
0
def main():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument("-ds",
                        "--dataset",
                        default="AmazonCat-13K",
                        type=str,
                        required=True)
    parser.add_argument("-le", "--label_embs", default='elmo', type=str)
    parser.add_argument("-gpu", "--device_num", default='1', type=str)
    parser.add_argument("-train", "--is_train", default=1, type=int)
    parser.add_argument("-ep", "--epochs", default=8, type=int)
    parser.add_argument("-ft", "--fine_tune", default=0, type=int)
    parser.add_argument("-from", "--ft_from", default=0, type=int)
    args = parser.parse_args()

    hypes = Hyperparameters(args.dataset)
    ds_path = '../datasets/' + args.dataset
    device_num = args.device_num
    label_embs = args.label_embs
    fine_tune = args.fine_tune
    epochs = args.epochs
    is_train = args.is_train
    ft = (args.fine_tune == 1)
    ft_from = args.ft_from
    trn_X_raw, trn_Y = load_raw(ds_path, 1)
    test_X_raw, test_Y = load_raw(ds_path, 0)

    bertReg = BertRegressor(hypes,
                            label_embs,
                            device_num,
                            epochs,
                            ft,
                            max_seq_len=256)
    trn_X_path = ds_path + '/trn_X'
    test_X_path = ds_path + '/test_X'
    if label_embs == 'elmo' or label_embs == 'en':
        label_space = smat.load_npz(ds_path + '/L.elmo.npz')
        trn_Y_path = ds_path + '/trn_elmo'
        test_Y_path = ds_path + '/test_elmo_Y'
        trn_X, trn_Y, trn_Y_nums = load_data(trn_X_raw, trn_Y, trn_X_path,
                                             trn_Y_path, bertReg, label_space,
                                             'elmo')
        test_X, test_Y, test_Y_nums = load_data(test_X_raw, test_Y,
                                                test_X_path, test_Y_path,
                                                bertReg, label_space, 'elmo')

    else:
        print('invalid label embedding type')
        exit()
    # print('Number of instnaces:', len(trn_X))
    # print('Number of labels:', len(trn_Y_nums))

    knn_path = ds_path + 'nbrs'
    if os.path.isfile(knn_path):
        with open(knn_path, 'rb') as f:
            nbrs = pkl.load(f)
    else:
        with open(knn_path, 'wb') as f:
            nbrs = NN(n_neighbors=5, algorithm='auto').fit(label_space)
            pkl.dump(nbrs, f)

    if is_train:
        if ft:
            print(
                '======================Start Fine-tuning======================'
            )
            model_path = '../save_models/regressor/' + hypes.dataset + '/ep-' + str(
                ft_from) + '-elmo/pytorch_model.bin'
            bertReg.train(trn_X, trn_Y, trn_Y_nums, label_space, nbrs,
                          model_path, ft_from)
        else:
            print('======================Start Training======================')
            bertReg.train(trn_X, trn_Y, trn_Y_nums, label_space, nbrs)
        output_dir = '../save_models/regressor/' + hypes.dataset + '/ep-' + str(
            epochs + ft_from) + '-' + label_embs + '/'
        bertReg.save(output_dir)
    else:
        model_path = '../save_models/regressor/' + hypes.dataset + '/ep-' + str(
            ft_from) + '/pytorch_model.bin'
        print('======================Start Testing======================')
        accs = bertReg.evaluate(test_X, test_Y, test_Y_nums, label_space, nbrs,
                                model_path)
Ejemplo n.º 25
0
import read_data
import model
import utils
from Hyperparameters import Hyperparameters

import tensorflow.compat.v1 as tf
import numpy as np

hyperparameters = Hyperparameters()

with tf.Graph().as_default():
    # =========================================================================================================
    # BUILD MODEL
    # =========================================================================================================
    train_operation = model.model_architecture(hyperparameters)

    # =========================================================================================================
    # LOAD DATA
    # =========================================================================================================
    input_train, train_label, input_test, test_label = read_data.load_data(
        hyperparameters.num_points)
    scaled_laplacian_train, scaled_laplacian_test = read_data.prepare_data(
        input_train, input_test, hyperparameters.num_neighhbors,
        hyperparameters.num_points)

    # =========================================================================================================
    # TRAIN MODEL
    # =========================================================================================================
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
Ejemplo n.º 26
0
def test2():
    # More complex integration test
    defaults = {
        'a': 'a1',
        'b': {
            'c': 'c1'
        },
        'd': 'd1',
        'e': {
            'f': 'f1',
            'g': 'g1',
            'h': 'h1',
        }
    }
    non_hash = {
        'i': 'i1',
        'j': {
            'k': 'k1',
            'l': 'l1',
        }
    }

    inputs = {
        'd': 'd2',  # Simple override
        'e': {
            'g': 'g2'
        },  # Complex hash override, want to only override 'g',
        # but without affecting 'f' or 'h'.
        'j': {
            'l': 'l2'
        }  # Complex non-hash override, want to only override
        # 'l', but without affecting 'k'
    }

    correct_integration = {
        'a': 'a1',
        'b': {
            'c': 'c1'
        },
        'd': 'd2',
        'e': {
            'f': 'f1',
            'g': 'g2',
            'h': 'h1',
        },
        'i': 'i1',
        'j': {
            'k': 'k1',
            'l': 'l2',
        }
    }

    hps = Hyperparameters(inputs, defaults, non_hash)

    print_test_result(
        hps._integrated_hps, correct_integration,
        'test 2a: complex hp integration, partial dict overrides.')

    # These will be wrong or throw access errors if implementation is wrong
    print_test_result(hps['e:g'], 'g2',
                      'test 2b: recursive __getattr__')  # should be 'g2'
    print_test_result(hps['j:k'], 'k1',
                      'test 2c: recursive __getattr__')  # should be 'k1'

    # Test partitioning for hashing
    # should include d and g but NOT l
    print_test_result(hps.hps_to_hash, {
        'd': 'd2',
        'e': {
            'g': 'g2'
        }
    }, 'test 2d: hp partitioning for hashing')
Ejemplo n.º 27
0
# Choose device
USE_GPU = True

device = None
if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
import numpy as np
import matplotlib.pyplot as plt
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from torch.optim.lr_scheduler import MultiStepLR, ReduceLROnPlateau
from Trainer import Trainer
from Hyperparameters import Hyperparameters as Hyp
# Set up trainer
tr = Trainer(ResNet50, 'data', device, batch_size=128)
optim_params = Hyp()
optim_params.register('lr')
optim_params.set_value('lr', 1e-4)
tr.set_hyperparameters(optim_params)
tr.set_hyperparameters(optim_params)
tr.set_criterion(CrossEntropyLoss)
tr.set_optimizer(Adam)
tr.set_scheduler(MultiStepLR)
tr.prime_optimizer()
tr.prime_scheduler(milestones=[5], gamma=0.1)
tr.prime_model(pretrained=True)
tr.train(epochs=30, save_every=10)
hist = tr.history
write_history('raw', hist)