def train(model):
    dataGen = DataGenerator("train", 169728, BATCH_SIZE, True)
    dataGenval = DataGenerator("val", 42432, BATCH_SIZE, False)
    dataGentest = DataGenerator("test", 48832, 64, False)

    filepath = "BioElmoTextModel-{epoch:02d}-{val_loss:.2f}.hdf5"
    modelckp = ModelCheckpoint(filepath,
                               monitor='val_loss',
                               verbose=1,
                               save_best_only=False,
                               mode='min')
    clr = CyclicLR(base_lr=0.0001, max_lr=0.0006, step_size=2000.)
    es = EarlyStopping(monitor="val_loss", mode=min, verbose=1)
    callbacks_list = [
        modelckp, checkpoint, clr, es,
        roc_callback(dataGentest, np.array(dataGentest.labels))
    ]
    model.fit_generator(dataGen,
                        callbacks_list=callbacks_list,
                        validation_data=dataGenval,
                        use_multiprocessing=False,
                        verbose=1,
                        epochs=EPOCHS,
                        callbacks=call_list,
                        workers=4)

    model.save_weights("BioElmoTextModel.h5")
class DataProviderService:
    def __init__(self, nr_of_items):
        self.data_generator = DataGenerator()
        self.CANDIDATES = self.data_generator.generate_candidates(nr_of_items)

    def get_candidates(self):
        return self.CANDIDATES

    def get_candidate(self, id):
        result = None
        if id:
            for cand in self.CANDIDATES:
                if id == str(cand["id"]):
                    result = cand
                    break

        return result

    def get_random_candidates(self, nr_of_candidates):
        return self.data_generator.generate_candidates(nr_of_candidates)

    def update_name(self, id, new_name):
        nr_of_updated_items = 0

        for cand in self.CANDIDATES:
            if id == str(cand["id"]):
                cand["first_name"] = new_name
                nr_of_updated_items += 1
                break

        return nr_of_updated_items

    def delete_candidate(self, id):
        cand_for_delete = None
        for cand in self.CANDIDATES:
            if id == str(cand["id"]):
                cand_for_delete = cand
                break

        if cand_for_delete is not None:
            self.CANDIDATES.remove(cand_for_delete)
            return True
        else:
            return False

    def add_candidate(self, first_name, last_name):
        cand = Candidate(first_name, last_name, [])
        self.CANDIDATES.append(cand.serialize())
        return str(cand.id)

    def add_project(self, project_name, project_description):
        new_project = Project(project_name, datetime.datetime.utcnow(),
                              datetime.datetime.utcnow(), project_description)

        self.CANDIDATES[0]['experience'][0]['projects'].append(
            new_project.serialize())
        return str(new_project.id)

    def get_random_projects(self, nr_of_projects):
        return self.data_generator.generate_projects(nr_of_projects, True)
class DataProviderService:
    def __init__(self, nr_of_items):
        self.data_generator = DataGenerator()
        self.CANDIDATES = self.data_generator.generate_candidates(nr_of_items)

    def get_candidates(self):
        return self.CANDIDATES

    def get_candidate(self, id):
        result = None
        if id:
            for cand in self.CANDIDATES:
                if id == str(cand["id"]):
                    result = cand
                    break

        return result

    def get_random_candidates(self, nr_of_candidates):
        return self.data_generator.generate_candidates(nr_of_candidates)

    def update_name(self, id, new_name):
        nr_of_updated_items = 0

        for cand in self.CANDIDATES:
            if id == str(cand["id"]):
                cand["first_name"] = new_name
                nr_of_updated_items += 1
                break

        return nr_of_updated_items

    def delete_candidate(self, id):
        cand_for_delete = None
        for cand in self.CANDIDATES:
            if id == str(cand["id"]):
                cand_for_delete = cand
                break

        if cand_for_delete is not None:
            self.CANDIDATES.remove(cand_for_delete)
            return True
        else:
            return False

    def add_candidate(self, first_name, last_name):
        cand = Candidate(first_name, last_name, [])
        self.CANDIDATES.append(cand.serialize())
        return str(cand.id)

    def add_project(self, project_name, project_description):
        new_project = Project(project_name, datetime.datetime.utcnow(), datetime.datetime.utcnow(), project_description)

        self.CANDIDATES[0]['experience'][0]['projects'].append(new_project.serialize())
        return str(new_project.id)


    def get_random_projects(self, nr_of_projects):
        return self.data_generator.generate_projects(nr_of_projects, True)
예제 #4
0
 def run(self):
     datagen = DataGenerator()
     trainX, trainY, validationX, validationY, testX, testY = datagen.make_cases(
         NUMBER_OF_CASES, SPLIT)
     # datagen.display_N_pictures(10)
     self.training_phase(trainX, trainY)
     self.validation_phase(validationX, validationY)
     self.test_phase(testX, testY)
예제 #5
0
    def train(self, datasets=(0, 1)):
        # /home/etienne/data/default_experiment/models/model.{epoch:06d}.h5
        current_model_directory = data_prefix + '/data/similarities/%s/%s' % (
            self.type, self.feat)
        import os
        if not os.path.exists(current_model_directory):
            os.makedirs(current_model_directory)
        current_model_path = current_model_directory + '/model.{epoch:06d}.h5'
        lr_scheduler = LearningRateScheduler(lr_schedule)
        lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                                       cooldown=0,
                                       patience=5,
                                       min_lr=1e-7)

        print(current_model_path)
        # tensorboard = keras.callbacks.TensorBoard(log_dir=logs_path, write_graph=False)
        checkpoint = keras.callbacks.ModelCheckpoint(current_model_path,
                                                     save_best_only=True,
                                                     monitor='val_loss',
                                                     mode='min',
                                                     verbose=1)

        callbacks_list = [
            lr_reducer,
            lr_scheduler,
            # """tensorboard,"""
            checkpoint
        ]
        dataset = generate_dataset((datasets[0], datasets[1]))

        print(dataset.label.value_counts())
        validation_set = dataset.sample(int(dataset.shape[0] * 0.2),
                                        replace=False)
        training_set = dataset.drop(validation_set.index)
        if self.type == 'contrastive':
            n_classes = 1
        else:
            n_classes = 2
        train_datagen = DataGenerator(training_set,
                                      batch_size=BATCH_SIZE,
                                      augment=True,
                                      n_classes=n_classes)
        valid_datagen = DataGenerator(validation_set,
                                      batch_size=BATCH_SIZE,
                                      n_classes=n_classes)

        history = self.model.fit_generator(
            train_datagen,
            epochs=50,
            steps_per_epoch=min(5000, int(training_set.shape[0] / BATCH_SIZE)),
            # steps_per_epoch=1,
            callbacks=callbacks_list,
            validation_steps=min(int(validation_set.shape[0] / BATCH_SIZE),
                                 500),
            # validation_steps=1,
            validation_data=valid_datagen,
            shuffle=True)
        self.history.append(history)
예제 #6
0
def train():

    ids = np.arange(1, 604)

    kf = KFold(n_splits=n_folds)
    fold = 0
    epochs = 1000
    cvscores = []
    for train_index, test_index in kf.split(ids):
        valid_steps = len(test_index) / batch_size
        steps_per_epoch = len(train_index) / (2 * batch_size)  # 2

        #print( train_index[[1,2,3,4,5,6]])
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=5),
            ModelCheckpoint(filepath=prefix_model + "best_m_{}".format(fold),
                            monitor='val_loss',
                            save_best_only=True)
        ]

        training_generator = DataGenerator(**params).generate(
            prefix, ids[train_index], ids[train_index])
        valid_generator = DataGenerator(**params2).generate(
            prefix, ids[test_index], ids[test_index])

        K.clear_session()
        linknet = model.get_model2()

        #print( linknet.summary() )
        learning_rate = 1e-4
        decay_rate = learning_rate / epochs

        optimizer = optimizers.Adam(lr=learning_rate, decay=decay_rate)
        linknet.compile(loss=model.loss,
                        optimizer="adam",
                        metrics=['accuracy', model.dice])
        #tensorboard = TensorBoard(log_dir="logs/{}".format(time()))

        linknet.fit_generator(generator=training_generator,
                              steps_per_epoch=steps_per_epoch,
                              callbacks=callbacks,
                              validation_data=valid_generator,
                              validation_steps=valid_steps,
                              epochs=epochs)

        scores = linknet.evaluate_generator(generator=valid_generator,
                                            steps=valid_steps)
        cvscores.append(scores[0])

        predictions = linknet.predict_generator(generator=training_generator,
                                                steps=100)
        save_preds(predictions, fold)
        fold = fold + 1
        del linknet

    for i, sc in enumerate(cvscores):

        print("loss fold {}:{} ".format(i, sc))
예제 #7
0
def main():
    train_data = DataGenerator(FLAGS, 'train', 40000)
    valid_data = DataGenerator(FLAGS, 'valid', 5000)
    # test_data = DataGenerator(FLAGS, 'test')

    my_model = pointer_net.PointerNet(batch_size=FLAGS.batch_size,
                                            max_input_sequence_len=FLAGS.max_input_sequence_len,
                                            max_output_sequence_len=FLAGS.max_output_sequence_len,
                                            rnn_size=FLAGS.rnn_size,
                                            attention_size=FLAGS.attention_size,
                                            num_layers=FLAGS.num_layers,
                                            beam_width=FLAGS.beam_width,
                                            learning_rate=FLAGS.learning_rate,
                                            max_gradient_norm=FLAGS.max_gradient_norm,
                                            )
    trainsummary = open('trainingsummary.txt', 'w+')
    with tf.Session() as sess:
        writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph)
        ckpt = tf.train.get_checkpoint_state(FLAGS.log_dir)
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print("Load model parameters from %s" % ckpt.model_checkpoint_path)
            my_model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("Created model with fresh parameters.")
            sess.run(tf.global_variables_initializer())
        print('start!!!!!!!!!!!!!!!!!')
        step_time = 0.0
        loss = 0.0
        current_step = 0
        train_flag_var = False
        for _ in range(FLAGS.train_epoch*(train_data.data_size//FLAGS.batch_size)):
            start_time = time.time()
            inputs, enc_input_weights, outputs, dec_input_weights = \
                train_data.get_batch(True)
            summary, step_loss, predicted_ids_with_logits, targets, debug_var = \
                my_model.step(sess, inputs, enc_input_weights, outputs, dec_input_weights, train_flag_var)
            step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # Time to print statistic and save model
            if current_step % FLAGS.steps_per_checkpoint == 0:
                train_flag_var = True
                with sess.as_default():
                    gstep = my_model.global_step.eval()
                print("global step %d step-time %.2f loss %.2f" % (gstep, step_time, loss))
                trainsummary.write('Epoch %d \n' % (current_step/FLAGS.steps_per_checkpoint))
                trainsummary.write("global step %d step-time %.2f loss %.2f \n" % (gstep, step_time, loss))
                ####
                eval_valid(valid_data, my_model, sess, train_flag_var, trainsummary)

                writer.add_summary(summary, gstep)
                checkpoint_path = os.path.join(FLAGS.log_dir, "wdp.ckpt")
                my_model.saver.save(sess, checkpoint_path, global_step=my_model.global_step)
                step_time, loss = 0.0, 0.0
                train_flag_var = False

    trainsummary.close()
예제 #8
0
    def __init__(self, checkpoint_path):
        self.checkpoint_path = checkpoint_path

        # set training parameters
        self.learning_rate = 0.01
        self.num_iter = 100000
        self.save_iter = 1000
        self.val_iter = 1000
        self.log_iter = 100
        self.batch_size = 16

        # set up data layer
        self.image_size = (224, 224)
        self.data_generator = DataGenerator(self.image_size)
예제 #9
0
class DataProviderService:
    def __init__(self, nr_of_items):
        self.data_generator = DataGenerator()
        self.CANDIDATES = self.data_generator.generate_candidates(nr_of_items)

    def get_candidates(self):
        return self.CANDIDATES

    def get_candidate(self, id):
        result = None
        if id:
            for cand in self.CANDIDATES:
                if id == str(cand['id']):
                    result = cand
                    break

        return result

    def get_random_candidates(self, nr_of_candidates):
        return self.data_generator.generate_candidates(nr_of_candidates)

    def update_name(self, id, new_name):
        nr_of_updated_items = 0

        for cand in self.CANDIDATES:
            if id == str(cand['id']):
                cand['first_name'] = new_name
                nr_of_updated_items += 1
                break

        return nr_of_updated_items

    def delete_candidate(self, id):
        cand_for_delete = None
        for cand in self.CANDIDATES:
            if id == str(cand['id']):
                cand_for_delete = cand
                break

        if cand_for_delete is not None:
            self.CANDIDATES.remove(cand_for_delete)
            return True
        else:
            return False

    def add_candidate(self, first_name, last_name):
        cand = Candidate(first_name, last_name, [])
        self.CANDIDATES.append(cand.serialize())
        return str(cand.id)
예제 #10
0
파일: broker.py 프로젝트: ava6969/samgame
    def __init__(self, account: Account, max_limit=500):
        # attributes
        self.account = account
        self.dataloader = DataGenerator()
        self.index = 0
        self.max_limit = max_limit
        self.tickers = []
        self.tech_indicators = []
        self.max_idx = None
        self.current_day = None

        self.minute_account_balances = []
        self.daily_account_balances = []
        self.all_tickers = self.dataloader.all_syms
        self.generator = None
예제 #11
0
 def iterable_helper(self_data, other_data):
     if len(self_data) != len(other_data):
         return False
     for self_item, other_item in zip(self_data, other_data):
         if type(self_item) != type(other_item):
             return False
         elif DataGenerator.is_dict(self_item):
             if not dicts_helper(self_item, other_item):
                 return False
         elif DataGenerator.is_iterable(self_item):
             if not iterable_helper(self_item, other_item):
                 return False
         else:
             if self_item != other_item:
                 return False
     return True
예제 #12
0
 def __init__(self, state, year):
     self.state = state
     self.year = year
     self._data_generator = DataGenerator(state, year)
     self.dv_like_relationships = set([27, 3, 4, 6, 12, 21, 26])
     self.violent_offenses = set([1, 3, 4, 27, 32, 36, 38, 43, 51])
     self._tables = {}
예제 #13
0
    def test__add_to_dict_with__map_filter_cols(self):
        data_generator = DataGenerator('TX', 2000)
        for name, file in data_generator.extract_zip():
            if 'cde_agencies.csv' in name.lower():
                lookup = filename_map.get_data(name)
                data_generator._add_to_dict(lookup, file)

        self.maxDiff = None

        expected = [
            'agency_id', 'ori', 'ncic_agency_name', 'state_id', 'state_abbr',
            'population', 'population_group_code', 'population_group_desc',
            'nibrs_start_date', 'county_name'
        ]
        result = list(data_generator._dict['agencies']['19089'][0].keys())
        self.assertCountEqual(expected, result)
예제 #14
0
 def load_go_data(self, data_type='train', num_samples=1000,use_generator=False):
     
     splitter = Splitter(data_dir=self.data_dir)
     data = splitter.draw_data(data_type, num_samples)
     zip_names = set()
     indices_by_zip_name = {}
     
     for filename, index in data:
         zip_names.add(filename) #collect all zip file names contained in the data in a list
         if filename not in indices_by_zip_name:
             indices_by_zip_name[filename] = []
         indices_by_zip_name[filename].append(index) #group all sgf file indices by zip file name
     
     for zip_name in zip_names:
         base_name = zip_name.replace('.tar.gz', '')
         data_file_name = base_name + data_type  # train or test
         if not os.path.isfile(self.data_dir + '/' + data_file_name): 
             # extracrt the sgf files and encode them to numpy arrays (features and labels) and save them as chunks on disk
             self.process_zip(zip_name, data_file_name, indices_by_zip_name[zip_name])  
 
     if use_generator:
         generator = DataGenerator(self.data_dir, data)
         return generator 
     else:
         features_and_labels = self.group_games(data_type, data)
         return features_and_labels
예제 #15
0
def predict_eval():

    ll = []
    for i in range(0, 4):
        K.clear_session()
        f = "../models/best_m_{}".format(i)
        ids = np.arange(1, 604)
        test_index = np.arange(500)
        valid_generator = DataGenerator(**params2).generate(
            prefix, ids[test_index], ids[test_index])

        ln = model.get_model2()
        ln.load_weights(f)
        ln.compile(loss=model.loss,
                   optimizer="adam",
                   metrics=['accuracy', model.dice])

        scores = ln.evaluate_generator(generator=valid_generator, steps=50)

        print(scores)
        ll.append(scores)
        del ln
        #return None

    for l in ll:
        print(l)
예제 #16
0
    def train(self):

        # load and save configuration
        config = self._config
        config.save()

        # load training data
        loader = SourceLoader("train", config.get_parameter("landmarks"), config.is_debug())
        sources = loader.get_sources()
        input_shape = config.input_shape()

        # split the data into validation and training set
        len_data = len(sources)
        numbers = list(range(len_data))
        np.random.shuffle(numbers)
        split_data = math.floor(len_data*config.get_parameter('validation_split'))
        train_ids = numbers[0:split_data]
        val_ids = numbers[split_data+1:len(numbers)]
        validation_sources = sources[val_ids]
        training_sources = sources[train_ids]

        # build data generators
        training_generator = DataGenerator(training_sources, **config.get_bundle("generator"))
        validation_generator = DataGenerator(validation_sources, **config.get_bundle("generator"))

        model = self._generate_model(input_shape)

        # define callback  function to save state of the trained model
        cp_callback = tensorflow.keras.callbacks.ModelCheckpoint(**config.get_bundle("checkpoint"))

        # load existing weights if existant according to the configuration
        # and set number of epoch accordingly
        initial_epoch = 0
        latest = tensorflow.train.latest_checkpoint(config.checkpoint_path())
        if latest:
            print("found existing weights, loading...")
            model.load_weights(latest)
            found_num = re.search(r'\d+', os.path.basename(latest))
            if found_num:
                checkpoint_id = int(found_num.group(0))
                initial_epoch = checkpoint_id

        # fit the model
        model.fit(training_generator,
                            validation_data=validation_generator, epochs=config.get_parameter("epochs"),
                  initial_epoch=initial_epoch,
                  callbacks=[cp_callback])
예제 #17
0
def trainIters(encoder,
               decoder,
               n_iters,
               print_every=1000,
               plot_every=2,
               learning_rate=5e-4):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    motions, claims = DataGenerator(lexicon_count=lexicon_count,
                                    motion_length=motion_length,
                                    claim_length=claim_length,
                                    num_data=num_train_data,
                                    data_dir=train_data_dir,
                                    batch_size=num_train_data,
                                    shuffle=True).generate().next()
    # print(training_pairs)
    # training_pairs = [variablesFromPair(random.choice(pairs))
    #                   for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, num_train_data + 1):
        # training_pair = training_pairs[iter - 1]
        # print(training_pair)

        motion = np.asarray(motions[iter - 1])
        claim = np.asarray(claims[iter - 1])
        motion = Variable(torch.LongTensor(motion.tolist()).unsqueeze(1))
        claim = Variable(torch.LongTensor(claim.tolist()).unsqueeze(1))

        input_variable = motion.cuda() if use_cuda else motion
        target_variable = claim.cuda() if use_cuda else claim
        # print(training_pair)
        # print(input_variable)
        # print(target_variable)
        # exit()

        loss = train(input_variable, target_variable, encoder, decoder,
                     encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' %
                  (timeSince(start, iter / num_train_data), iter,
                   iter / num_train_data * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)
예제 #18
0
def train(model, train_img_ids, val_img_ids, args):
    train_generator = DataGenerator(train_img_ids, args.train_images_folder,
                                    args.batch_size, args.patch_size)
    checkpointer = keras.callbacks.ModelCheckpoint(filepath=args.model_path,
                                                   verbose=1,
                                                   save_best_only=False)
    if len(val_img_ids) > 0:
        val_generator = DataGenerator(val_img_ids, args.val_images_folder,
                                      args.batch_size, args.patch_size)
    else:
        val_generator = None
    model.fit_generator(generator=train_generator,
                        validation_data=val_generator,
                        epochs=args.epochs,
                        verbose=1,
                        callbacks=[checkpointer],
                        use_multiprocessing=False)
예제 #19
0
    def test__add_to_dict(self):
        data_generator = DataGenerator('TX', 2013)
        for name, file in data_generator.extract_zip():
            if 'nibrs_victim.csv' in name.lower():
                lookup = filename_map.get_data(name)
                data_generator._add_to_dict(lookup, file)

        expected = 189139
        result = len(data_generator._dict['nibrs_victim'])
        self.assertEqual(expected, result)

        expected = [
            'incident_id', 'age_num', 'sex_code', 'race_id', 'ethnicity_id',
            'victim_id', 'victim_type_id', 'victim_seq_num'
        ]
        result = list(
            data_generator._dict['nibrs_victim']['68950600'][0].keys())
        self.assertCountEqual(expected, result)
예제 #20
0
    def test__get_cols(self):
        lookup = filename_map.get_data('nibrs_location_type.csv')
        row = {
            'merp': 'herro',
            'location_id': 47,
            'mop': 788888,
            'location_name': 'bad one',
        }

        data_generator = DataGenerator('KY', 2016)

        expected = {
            'location_id': 47,
            'location_name': 'bad one',
        }
        result = data_generator._filter_cols(lookup, row)

        self.assertDictEqual(expected, result)
예제 #21
0
    def test__map_filter_cols(self):
        lookup = filename_map.get_data('cde_agencies.csv')
        row = {
            'merp': 'herro',
            'ori': 47,
            'mop': 788888,
            'primary_county': 'Mexicaliand',
        }

        data_generator = DataGenerator('TX', 2015)

        expected = {
            'ori': 47,
            'county_name': 'Mexicaliand',
        }
        result = data_generator._map_filter_cols(lookup, row)

        self.assertDictEqual(expected, result)
예제 #22
0
def trainer(model_params):
    datasets = load_dataset('dataset', model_params)
    train_set = datasets[0][0] + datasets[0][1]
    valid_set = datasets[1][0] + datasets[1][1]
    test_set = datasets[2][0] + datasets[2][1]
    labels = {dataset: i for (i, dataset) in enumerate(model_params.data_set)}
    train_generator = DataGenerator(train_set,
                                    labels,
                                    batch_size=model_params.batch_size,
                                    shuffle=True)
    valid_generator = DataGenerator(valid_set,
                                    labels,
                                    batch_size=model_params.batch_size,
                                    shuffle=True)
    model = discriminator.Discriminator(model_params)
    model.train(train_generator, valid_generator)
    model.save()
    print('Done!')
예제 #23
0
 def helper(data, keys):
     for key in data:
         if DataGenerator.is_dict(data[key]):
             keys.append([key,
                          []])  # the empty dict is for the inner keys
             helper(data[key],
                    keys[-1][1] if type(keys[-1]) is list else keys)
         else:
             keys.append(key)
     return keys
예제 #24
0
    def run_trial(self,
                  trial,
                  data=None,
                  labels=None,
                  users=None,
                  indices=None,
                  batch_size=32):
        self.ntrial += 1
        hp = trial.hyperparameters
        print('Trial {:d}'.format(self.ntrial))
        print(hp.values)
        if "tuner/trial_id" in hp:
            past_trial = self.oracle.get_trial(hp['tuner/trial_id'])
            model = self.load_model(past_trial)
        else:
            model = self.hypermodel.build(hp)
        initial_epoch = hp['tuner/initial_epoch']
        epochs = hp['tuner/epochs']

        # Cross-validation based on users
        fold = 0
        val_losses = []
        cv = GroupKFold(n_splits=self.cv)
        trial_users = users[indices]
        X = np.zeros((len(trial_users), 10))
        y = np.zeros(len(trial_users))  # dummy for splitting
        for train_indices, val_indices in cv.split(X, y, trial_users):
            fold += 1
            print('Inner CV fold {:d}'.format(fold))
            train_gen = DataGenerator(indices[train_indices], data, labels, self.states, partition='train',\
                                      batch_size=batch_size, seqlen=self.seqlen, n_channels=self.num_channels, feat_channels=self.feat_channels,\
                                      n_classes=self.num_classes, shuffle=True, balance=True, mean=self.mean, std=self.std)
            val_gen = DataGenerator(indices[val_indices], data, labels, self.states, partition='test',\
                                    batch_size=batch_size, seqlen=self.seqlen, n_channels=self.num_channels, feat_channels=self.feat_channels,\
                                    n_classes=self.num_classes, mean=self.mean, std=self.std)
            model = self.hypermodel.build(trial.hyperparameters)
            model.fit(train_gen, epochs=epochs, validation_data=val_gen,\
                      verbose=1, shuffle=False, initial_epoch=initial_epoch,
                      workers=2, max_queue_size=20, use_multiprocessing=False )
            val_losses.append(model.evaluate(val_gen))
        self.oracle.update_trial(trial.trial_id,
                                 {'val_loss': np.mean(val_losses)})
        self.save_model(trial.trial_id, model)
    def __init__(self):
        self.height = height
        self.width = width
        self.channels = channels
        self.batch_size = batch_size
        self.epochs = epochs
        self.line = line
        self.n_show_image = n_show_image
        self.vgg = vgg
        self.optimizer = optimizer
        self.DG = DataGenerator(X_train,
                                Y_train,
                                batch_size=batch_size,
                                dim=(128, 128))
        self.DGP = DataGenerator_predict(X_predict,
                                         batch_size=batch_size,
                                         dim=(128, 128))
        self.number = number

        self.AE = self.build_AE()
        self.AE.compile(loss='mse', optimizer=self.optimizer)
예제 #26
0
 def dicts_helper(self_data, other_data):
     if len(self_data) != len(other_data):
         return False
     for (self_k, self_v), (other_k,
                            other_v) in zip(self_data.items(),
                                            other_data.items()):
         if self_k == other_k:
             if type(self_v) != type(other_v):
                 return False
             elif DataGenerator.is_dict(self_v):
                 if not dicts_helper(self_v, other_v):
                     return False
             elif DataGenerator.is_iterable(self_v):
                 if not iterable_helper(self_v, other_v):
                     return False
             else:
                 if self_v != other_v:
                     return False
         else:
             return False
     return True
예제 #27
0
파일: trainer.py 프로젝트: zLi90/deepriver
    def __init__(self, checkpoint_path='./checkpoints/'):
        self.checkpoint_path = checkpoint_path

        # set training parameters
        self.learning_rate = 0.0001
        self.num_iter = 1000000
        self.log_iter = 500
        self.save_iter = 5000
        self.batch_size = 1  # use batch re-norm which allows batch_size = 1

        # create the data generator
        self.image_size = (1024, 1024)
        self.data_generator = DataGenerator(self.image_size)
        self.num_labels = 3
예제 #28
0
def load_generators(data_dir):
    # Parameters
    params = {
        'dim': (96, 96),
        'batch_size': 100,
        'n_classes': 2,
        'n_channels': 3,
        'shuffle': True
    }

    # Data
    data = pd.read_csv(data_dir + 'train_labels.csv')
    train, val = train_test_split(data, test_size=0.1, random_state=42)
    partition = {"train": list(train['id']), "validation": list(val['id'])}
    labels = dict(zip(data['id'], data['label']))

    train_dir = data_dir + "train/"

    # Generators
    train_gen = DataGenerator(partition['train'], labels, train_dir, **params)
    val_gen = DataGenerator(partition['validation'], labels, train_dir,
                            **params)

    return train_gen, val_gen
    def __init__(self):
        self.height = height
        self.width = width
        self.channels = channels
        self.batch_size = batch_size
        self.epochs = epochs
        self.line = line
        self.n_show_image = n_show_image
        self.vgg = vgg
        self.optimizerD = optimizerD
        self.optimizerC = optimizerC
        self.DG = DataGenerator(X_train, Y_train, batch_size=batch_size)
        self.DGP = DataGenerator_predict(X_predict, batch_size=batch_size)
        self.number = number

        patch = int(self.height / 2**1)
        self.disc_patch = (patch, patch, 3)

        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss='binary_crossentropy',
                                   optimizer=self.optimizerD)

        self.generator = self.build_generator()

        self.discriminator.trainable = False

        side = Input(shape=(self.height, self.width, self.channels))
        front = Input(shape=(self.height, self.width, self.channels))
        image = self.generator(side)

        valid = self.discriminator([front, image])

        self.combined = Model([side, front], [image, valid])
        self.combined.compile(loss=['mae', "mse"],
                              loss_weights=[100, 1],
                              optimizer=self.optimizerC)
예제 #30
0
    def read_data(self, data_dir):
        patterns = []
        labels = []

        i = -1
        for root, dirs, files in os.walk(data_dir):
            if i < 0:
                i = i + 1
            else:
                [patterns.append(root + '/' + file) for file in files]
                [labels.append(i) for file in files]
                i = i + 1

        self.__generator = DataGenerator(patterns, labels, self.__scale_size,
                                         self.__shuffle, self.__input_channels,
                                         self.__n_classes)
def makePredictions(model):
    dataGentest = DataGenerator("test", 48832, BATCH_SIZE, False)
    predictions = model.predict_generator(dataGentest,
                                          dataGentest.n / 64,
                                          verbose=1,
                                          workers=8)

    LABELS = [
        'No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly',
        'Airspace Opacity', 'Lung Lesion', 'Edema', 'Consolidation',
        'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion',
        'Pleural Other', 'Fracture', 'Support Devices'
    ]
    testdf = pd.read_csv(TEST, nrows=48832)
    array = np.array([predictions, testdf[LABELS].values])
    np.save("Elmo_Predictions", array)
예제 #32
0
class TestPipelineFramework(unittest.TestCase):

    def setUp(self):
        self.dg = DataGenerator()

    def test_create_empty_framework(self):
        with self.assertRaises(Exception):
            self.dg.get_empty_pipeline()

    def test_single_node_framework(self):
        p = self.dg.get_single_node_pipeline()
        self.assertEquals(len(p), 1)
        self.assertTrue(nx.is_tree(p.dag))

    def test_linear_framework(self):
        p = self.dg.get_linear_pipeline()
        self.assertEquals(len(p), 3)
        self.assertTrue(nx.is_tree(p.dag))

    def test_tree_framework(self):
        p = self.dg.get_tree_pipeline()
        self.assertEquals(len(p), 7)
        self.assertTrue(nx.is_tree(p.dag))

    def test_dag_framework(self):
        p = self.dg.get_dag_pipeline()
        self.assertEquals(len(p), 7)
        self.assertTrue(nx.is_directed_acyclic_graph(p.dag))

    def test_cyclic_framework(self):
        with self.assertRaises(Exception):
            self.dg.get_cyclic_pipeline()

    def test_disconnected_framework(self):
        p = self.dg.get_disconnected_pipeline()
        self.assertEquals(len(p), 5)
        self.assertTrue(nx.is_directed_acyclic_graph(p.dag))

    def test_unbalanced_framework(self):
        p = self.dg.get_unbalanced_pipeline()
        self.assertEquals(len(p), 7)
        self.assertTrue(nx.is_tree(p.dag))

    def test_ranktree_framework(self):
        p = self.dg.get_ranktree_pipeline()
        self.assertEquals(len(p), 14)
        self.assertTrue(nx.is_tree(p.dag))

    def test_loose_framework(self):
        p = self.dg.get_loose_pipeline()
        self.assertEquals(len(p), 6)
        self.assertTrue(nx.is_directed_acyclic_graph(p.dag))

    def test_self_ref_framework(self):
        with self.assertRaises(Exception):
            self.dg.get_self_ref_pipeline()

    def test_duplicate_node_framework(self):
        with self.assertRaises(Exception):
            self.dg.get_duplicate_node_pipeline()

    def test_unknown_uid_framework(self):
        with self.assertRaisesRegexp(KeyError, "Unknown UID C set as requirement for B"):
            self.dg.get_unknown_uid_framework()
예제 #33
0
    def __init__(self,
                 N,
                 T,
                 random_network,
                 sigma_x=0.1,
                 sigma_y=0.001,
                 sigma_baseline=0.0001,
                 renormalize_sigma=False,
                 time_weights=None,
                 time_weights_parameters=None,
                 cued_feature_time=0,
                 enforce_min_distance=0.17,
                 stimuli_generation='random',
                 enforce_first_stimulus=True,
                 stimuli_to_use=None,
                 specific_stimuli_random_centers=False,
                 specific_stimuli_asymmetric=False,
                 enforce_distance_cued_feature_only=False,
                 debug=False
                 ):

        if time_weights_parameters is None:
            time_weights_parameters = dict(weighting_alpha=0.3,
                                           weighting_beta=1.0,
                                           specific_weighting=0.3,
                                           weight_prior='uniform'
                                           )
        DataGenerator.__init__(self, N, T, random_network,
                               sigma_y=sigma_y,
                               time_weights=time_weights,
                               time_weights_parameters=time_weights_parameters
                               )

        # This is the noise on specific memories. Belongs here.
        self.init_all_sigma(sigma_x, sigma_y, sigma_baseline,
                            renormalize=renormalize_sigma)

        self.enforce_min_distance = enforce_min_distance

        # Build the correct stimuli
        if stimuli_to_use is not None:
            # Use the provided stimuli
            self.set_stimuli(stimuli_to_use)
        else:
            if stimuli_generation == 'specific_stimuli':
                # Use our specifically built function, to get the special
                # stimuli combination allowing to verify some biases
                self.generate_specific_stimuli(asymmetric=specific_stimuli_asymmetric, centre=np.array([0., 0.]), specific_stimuli_random_centers=specific_stimuli_random_centers)
            elif stimuli_generation is not None:
                # Generate it randomly
                self.generate_stimuli(stimuli_generation=stimuli_generation, enforce_first_stimulus=enforce_first_stimulus, cued_feature_R=1, enforce_distance_cued_feature_only=enforce_distance_cued_feature_only)
            else:
                raise ValueError("No data generation possible.")

        if debug:
            print "== DataGeneratorRfn =="
            print "Size: %d, %d items/times." % (N, T)
            print "sigma_x %.3g, sigma_y %.3g sigma_baseline %.3g renormalized: %d" % (self.sigma_x, self.sigma_y, self.sigma_baseline, renormalize_sigma)

        # Build the dataset
        self.build_dataset(cued_feature_time=cued_feature_time)
 def __init__(self, nr_of_items):
     self.data_generator = DataGenerator()
     self.CANDIDATES = self.data_generator.generate_candidates(nr_of_items)
예제 #35
0
파일: test_rank.py 프로젝트: enj/hivemind
class TestRank(unittest.TestCase):

    def setUp(self):
        self.dag = DataGenerator()

    def test_rank_by_total_successors_linear(self):
        p = self.dag.get_linear_pipeline()
        ranker.rank_by_total_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 2, 'B': 1, 'C': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_total_successors_tree(self):
        p = self.dag.get_tree_pipeline()
        ranker.rank_by_total_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 6, 'B': 2, 'C': 2, 'D': 0, 'E': 0, 'F': 0, 'G': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_total_successors_dag(self):
        p = self.dag.get_dag_pipeline()
        ranker.rank_by_total_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 6, 'B': 2, 'C': 5, 'D': 1, 'E': 0, 'F': 1, 'G': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_total_successors_dag_disconected(self):
        p = self.dag.get_disconnected_pipeline()
        ranker.rank_by_total_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 1, 'B': 0, 'C': 2, 'D': 0, 'E': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_total_successors_single_node(self):
        p = self.dag.get_single_node_pipeline()
        ranker.rank_by_total_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_total_successors_unbalanced_pipeline(self):
        p = self.dag.get_unbalanced_pipeline()
        ranker.rank_by_total_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 6, 'B': 4, 'C': 0, 'D': 3, 'E': 0, 'F': 0, 'G': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_total_successors_ranktree_pipeline(self):
        p = self.dag.get_ranktree_pipeline()
        ranker.rank_by_total_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 13, 'B': 6, 'C': 5, 'D': 2, 'E': 2, 'F': 0, 'G': 0,
                    'H': 0, 'I': 0, 'J': 0, 'K': 0, 'L': 0, 'M': 0, 'N': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_total_successors_loose_pipeline(self):
        p = self.dag.get_loose_pipeline()
        ranker.rank_by_total_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_successors_linear(self):
        p = self.dag.get_linear_pipeline()
        ranker.rank_by_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 1, 'B': 1, 'C': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_successors_tree(self):
        p = self.dag.get_tree_pipeline()
        ranker.rank_by_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 2, 'B': 2, 'C': 2, 'D': 0, 'E': 0, 'F': 0, 'G': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_successors_dag(self):
        p = self.dag.get_dag_pipeline()
        ranker.rank_by_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 1, 'B': 1, 'C': 3, 'D': 1, 'E': 0, 'F': 1, 'G': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_successors_dag_disconected(self):
        p = self.dag.get_disconnected_pipeline()
        ranker.rank_by_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 1, 'B': 0, 'C': 2, 'D': 0, 'E': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_successors_single_node(self):
        p = self.dag.get_single_node_pipeline()
        ranker.rank_by_total_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_successors_unbalanced_pipeline(self):
        p = self.dag.get_unbalanced_pipeline()
        ranker.rank_by_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 2, 'B': 1, 'C': 0, 'D': 3, 'E': 0, 'F': 0, 'G': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_successors_ranktree_pipeline(self):
        p = self.dag.get_ranktree_pipeline()
        ranker.rank_by_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 2, 'B': 2, 'C': 5, 'D': 2, 'E': 2, 'F': 0, 'G': 0,
                    'H': 0, 'I': 0, 'J': 0, 'K': 0, 'L': 0, 'M': 0, 'N': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_successors_loose_pipeline(self):
        p = self.dag.get_loose_pipeline()
        ranker.rank_by_successors(p)
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}
        self.assertEqual(ranks, expected)

    def test_rank_by_fifo_tree(self):
        p = self.dag.get_tree_pipeline()
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': None, 'B': None, 'C': None, 'D': None, 'E': None, 'F': None, 'G': None}
        self.assertEqual(ranks, expected)

    def test_rank_by_fifo_unbalanced_pipeline(self):
        p = self.dag.get_unbalanced_pipeline()
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': None, 'B': None, 'C': None, 'D': None, 'E': None, 'F': None, 'G': None}
        self.assertEqual(ranks, expected)

    def test_rank_by_fifo_ranktree_pipeline(self):
        p = self.dag.get_ranktree_pipeline()
        ranks = {task._uid: task._rank for task in p.dag.node}
        expected = {'A': None, 'B': None, 'C': None, 'D': None, 'E': None, 'F': None, 'G': None,
                    'H': None, 'I': None, 'J': None, 'K': None, 'L': None, 'M': None, 'N': None}
        self.assertEqual(ranks, expected)
예제 #36
0
파일: test_rank.py 프로젝트: enj/hivemind
 def setUp(self):
     self.dag = DataGenerator()
예제 #37
0
class TestConcretePipeline(unittest.TestCase):

    def setUp(self):
        self.dg = DataGenerator()

    def test_replace_none(self):
        data = self.dg.get_args()
        p = PipelineFramework([(Task("A", False, False, "exe", None, "path", "none", "are", "replaced"), [])])
        cp = ConcretePipeline(0, p, data, "blah")
        task = cp.dag.nodes()[0]
        self.assertEquals(str(task), "exe none are replaced")
        self.assertEquals(task.wd, "path")
        self.assertFalse(task.skip)

    def test_replace_simple(self):
        data = self.dg.get_args()
        p = PipelineFramework([(Task("B", "$$skip1$$", "f", "exe", None, "path", "-blah", "$$a1$$"), [])])
        cp = ConcretePipeline(0, p, data, "blah")
        task = cp.dag.nodes()[0]
        self.assertEquals(str(task), "exe -blah val_for_a1")
        self.assertEquals(task.wd, "path")
        self.assertTrue(task.skip)

    def test_replace_multiple(self):
        data = self.dg.get_args()
        p = PipelineFramework([(Task("C", "$$skip2$$", False, "$$a2$$", None, "$$a1$$", "$$a3$$", "$$a4$$"), [])])
        cp = ConcretePipeline(0, p, data, "blah")
        task = cp.dag.nodes()[0]
        self.assertEquals(str(task), "secondParameter a_3rd_one 4")
        self.assertEquals(task.wd, "val_for_a1")
        self.assertTrue(task.skip)

    def test_replace_repeated(self):
        data = self.dg.get_args()
        p = PipelineFramework([(Task("D", "$$skip3$$", False, "$$a1$$", None, "$$a1$$", "$$a1$$", "$$a1$$"), [])])
        cp = ConcretePipeline(0, p, data, "blah")
        task = cp.dag.nodes()[0]
        self.assertEquals(str(task), "val_for_a1 val_for_a1 val_for_a1")
        self.assertEquals(task.wd, "val_for_a1")
        self.assertTrue(task.skip)

    def test_replace_partial(self):
        data = self.dg.get_args()
        p = PipelineFramework([(Task("E", "$$skip4$$", False, "$$a1$$", None, "$$a4$$a4$$"), [])])
        with self.assertRaises(Exception):
            ConcretePipeline(0, p, data, "blah")

    def test_replace_back_to_back(self):
        data = self.dg.get_args()
        p = PipelineFramework([(Task("F", "$$skip5$$", False, "$$a4$$$$a4$$", None, "$$a2$$"), [])])
        cp = ConcretePipeline(0, p, data, "blah")
        task = cp.dag.nodes()[0]
        self.assertEquals(str(task), "44")
        self.assertEquals(task.wd, "secondParameter")
        self.assertFalse(task.skip)

    def test_replace_substring(self):
        data = self.dg.get_args()
        p = PipelineFramework([(Task("G", "$$skip6$$", False, "exe", None, "/path/$$a2$$/more", "-$$a3$$", "\"$$a4$$\""), [])])
        cp = ConcretePipeline(0, p, data, "blah")
        task = cp.dag.nodes()[0]
        self.assertEquals(str(task), "exe -a_3rd_one \"4\"")
        self.assertEquals(task.wd, "/path/secondParameter/more")
        self.assertFalse(task.skip)

    def test_replace_unicode(self):
        data = self.dg.get_args()
        p = PipelineFramework([(Task("G", False, False, u"exe", None, u"/path/$$a2$$/more", u"-$$a3$$", u"\"$$a4$$\""), [])])
        cp = ConcretePipeline(0, p, data, "blah")
        task = cp.dag.nodes()[0]
        self.assertEquals(str(task), "exe -a_3rd_one \"4\"")
        self.assertEquals(task.wd, "/path/secondParameter/more")
        self.assertFalse(task.skip)

    def test_replace_invalid_var(self):
        data = self.dg.get_args()
        p = PipelineFramework([(Task("G", "$$skip6$$", False, "exe", None, "/path/$$a2$$/more", "-$$a3$$", "$$a5$$"), [])])
        with self.assertRaises(Exception):
            ConcretePipeline(0, p, data, "blah")

    def test_replace_invalid_type(self):
        data = self.dg.get_args()
        p = PipelineFramework([(Task("G", "$$skip6$$", False, "exe", None, None, "-$$a3$$", "$$a4$$"), [])])
        with self.assertRaises(Exception):
            ConcretePipeline(0, p, data, "blah")

    def test_done(self):
        data = self.dg.get_args()
        p = self.dg.get_dag_pipeline()
        cp = ConcretePipeline(0, p, data, "blah")
        ready = list(cp.get_ready_tasks())
        self.assertEquals(len(ready), 2)
        # Get the "A" task
        task = [t for t in ready if t._uid == "A"][0]

        self.assertFalse(list(cp.get_ready_successors(task)))
        self.assertFalse(cp.is_done(task))
        cp.set_done(task)
        self.assertTrue(cp.is_done(task))
        self.assertEquals(list(cp.get_ready_successors(task))[0]._uid, "C")
        self.assertEquals(cp.get_completed_tasks(), 1)

    def test_mc(self):
        data = self.dg.get_args()

        def get_mc(p):
            return ConcretePipeline(0, p, data, "blah").mc

        self.assertEquals(get_mc(PipelineFramework([])), 0)
        self.assertEquals(get_mc(self.dg.get_single_node_pipeline()), 1)
        self.assertEquals(get_mc(self.dg.get_linear_pipeline()), 1)
        self.assertEquals(get_mc(self.dg.get_tree_pipeline()), 4)
        self.assertEquals(get_mc(self.dg.get_dag_pipeline()), 3)
        self.assertEquals(get_mc(self.dg.get_disconnected_pipeline()), 3)
        self.assertEquals(get_mc(self.dg.get_unbalanced_pipeline()), 4)
        self.assertEquals(get_mc(self.dg.get_ranktree_pipeline()), 9)
        self.assertEquals(get_mc(self.dg.get_loose_pipeline()), 6)