def train(self, embedding_size=128, window_size=5, workers=8, nb_epochs=50, **kwargs): kwargs["sentences"] = self.walks kwargs["min_count"] = kwargs.get("min_count", 0) kwargs["size"] = embedding_size kwargs["sg"] = 1 # skip gram kwargs["hs"] = 1 # deepwalk use Hierarchical Softmax kwargs["workers"] = workers kwargs["window"] = window_size kwargs["iter"] = nb_epochs kwargs["compute_loss"] = True kwargs["min_alpha"] = 1e-2 call_back = callback(all_loss=[]) kwargs['callbacks'] = [call_back] print("Learning embedding vectors...") self.word_vectors = Word2Vec(**kwargs).wv print("Learning embedding vectors done!") self.word_vectors = dict( zip(self.word_vectors.index2word, self.word_vectors.vectors.tolist()))
verbose='OFF') if args.generator != 'ON': train_sample = apply_t_scaler(train_sample, t_scaler, verbose='ON') sample_composition(train_sample) compo_matrix(valid_labels, train_labels=train_labels) print() train_weights, bins = get_sample_weights(train_sample, train_labels, args.weight_type, args.bkg_ratio, hist='pt') sample_histograms(valid_sample, valid_labels, train_sample, train_labels, train_weights, bins, args.output_dir) #sys.exit() callbacks = callback(args.model_out, args.patience, args.metrics) if args.generator == 'ON': del (train_sample) if np.all(train_weights) != None: train_weights = gen_weights(args.n_train, weight_idx, train_weights) print('\nLAUNCHING GENERATOR FOR', np.diff(args.n_train)[0], 'TRAINING SAMPLES') eval_gen = Batch_Generator(data_files, args.n_eval, input_data, args.n_tracks, args.n_classes, valid_batch_size, args.valid_cuts, scaler,
s_scaler) def separate_samples(sample): JZW = sample['JZW'] qcd_sample = {key: val[JZW != -1] for key, val in sample.items()} oe_sample = {key: val[JZW == -1] for key, val in sample.items()} return qcd_sample, oe_sample train_sample, oe_train_sample = separate_samples(train_sample) valid_sample, oe_valid_sample = separate_samples(valid_sample) print('\nTrain sample:', format(len(train_sample['weights']), '8.0f'), 'jets') print('Valid sample:', format(len(valid_sample['weights']), '8.0f'), 'jets\n') print('Using TensorFlow', tf.__version__, 'with', n_gpus, 'GPU(s)\n') callbacks = callback(args.model_out, args.patience, 'val_loss') #FOR STAND-ALONE ENCODER if args.encoder in ['ae', 'vae']: sample_weights = train_sample['weights'] if len(set(train_var) - {'constituents'}) == 0: train_X = train_sample['constituents'] valid_X = valid_sample['constituents'] elif 'constituents' not in train_var: train_X = train_sample['scalars'] valid_X = valid_sample['scalars'] else: train_X = np.concatenate( [train_sample['constituents'], train_sample['scalars']], axis=1) valid_X = np.concatenate( [valid_sample['constituents'], valid_sample['scalars']],