Ejemplo n.º 1
0
    def train(self,
              embedding_size=128,
              window_size=5,
              workers=8,
              nb_epochs=50,
              **kwargs):

        kwargs["sentences"] = self.walks
        kwargs["min_count"] = kwargs.get("min_count", 0)
        kwargs["size"] = embedding_size
        kwargs["sg"] = 1  # skip gram
        kwargs["hs"] = 1  # deepwalk use Hierarchical Softmax
        kwargs["workers"] = workers
        kwargs["window"] = window_size
        kwargs["iter"] = nb_epochs
        kwargs["compute_loss"] = True
        kwargs["min_alpha"] = 1e-2
        call_back = callback(all_loss=[])
        kwargs['callbacks'] = [call_back]

        print("Learning embedding vectors...")
        self.word_vectors = Word2Vec(**kwargs).wv
        print("Learning embedding vectors done!")

        self.word_vectors = dict(
            zip(self.word_vectors.index2word,
                self.word_vectors.vectors.tolist()))
Ejemplo n.º 2
0
                                              verbose='OFF')
        if args.generator != 'ON':
            train_sample = apply_t_scaler(train_sample, t_scaler, verbose='ON')

    sample_composition(train_sample)
    compo_matrix(valid_labels, train_labels=train_labels)
    print()
    train_weights, bins = get_sample_weights(train_sample,
                                             train_labels,
                                             args.weight_type,
                                             args.bkg_ratio,
                                             hist='pt')
    sample_histograms(valid_sample, valid_labels, train_sample, train_labels,
                      train_weights, bins, args.output_dir)
    #sys.exit()
    callbacks = callback(args.model_out, args.patience, args.metrics)
    if args.generator == 'ON':
        del (train_sample)
        if np.all(train_weights) != None:
            train_weights = gen_weights(args.n_train, weight_idx,
                                        train_weights)
        print('\nLAUNCHING GENERATOR FOR',
              np.diff(args.n_train)[0], 'TRAINING SAMPLES')
        eval_gen = Batch_Generator(data_files,
                                   args.n_eval,
                                   input_data,
                                   args.n_tracks,
                                   args.n_classes,
                                   valid_batch_size,
                                   args.valid_cuts,
                                   scaler,
Ejemplo n.º 3
0
                                                s_scaler)

    def separate_samples(sample):
        JZW = sample['JZW']
        qcd_sample = {key: val[JZW != -1] for key, val in sample.items()}
        oe_sample = {key: val[JZW == -1] for key, val in sample.items()}
        return qcd_sample, oe_sample

    train_sample, oe_train_sample = separate_samples(train_sample)
    valid_sample, oe_valid_sample = separate_samples(valid_sample)
    print('\nTrain sample:', format(len(train_sample['weights']), '8.0f'),
          'jets')
    print('Valid sample:', format(len(valid_sample['weights']), '8.0f'),
          'jets\n')
    print('Using TensorFlow', tf.__version__, 'with', n_gpus, 'GPU(s)\n')
    callbacks = callback(args.model_out, args.patience, 'val_loss')
    #FOR STAND-ALONE ENCODER
    if args.encoder in ['ae', 'vae']:
        sample_weights = train_sample['weights']
        if len(set(train_var) - {'constituents'}) == 0:
            train_X = train_sample['constituents']
            valid_X = valid_sample['constituents']
        elif 'constituents' not in train_var:
            train_X = train_sample['scalars']
            valid_X = valid_sample['scalars']
        else:
            train_X = np.concatenate(
                [train_sample['constituents'], train_sample['scalars']],
                axis=1)
            valid_X = np.concatenate(
                [valid_sample['constituents'], valid_sample['scalars']],