コード例 #1
0
ファイル: test.py プロジェクト: HFooladi/trnet
def data_processing(dataset_dir):
    """
		Data Processing
	"""
    train, validation, OO, NO, ON, NN, \
    pre_treatment, drug_fingerprint_info \
    = utils.data_processing(dataset_dir,
          parameters.train_data_path,
          parameters.validation_data_path,
          parameters.OO_data_path,
          parameters.NO_data_path,
          parameters.ON_data_path,
          parameters.NN_data_path,
          parameters.pre_treatment_data_path,
          parameters.drug_fingerprint_data_path
          )
    """
		Printing Data Statistics
	"""
    utils.print_statistics(len(train), len(validation), len(OO), len(NO),
                           len(ON), len(NN), len(pre_treatment),
                           len(drug_fingerprint_info))

    return train, validation, OO, NO, ON, NN, \
    pre_treatment, drug_fingerprint_info
コード例 #2
0
def feature_selection(x_train, y_train, x_test, y_test):
    print("Feature selection with LinearSVC")
    model = LinearSVC(C=0.1, penalty='l2')
    rfe = RFE(model, 5)
    best_features_model = rfe.fit(x_train, y_train)
    y_hat = best_features_model.predict(x_test)
    utils.print_statistics(y_test, y_hat)
コード例 #3
0
def grid_classifier(x_train, y_train, x_test, y_test, model, parameters,
                    make_feature_analysis=False, feature_names=None, top_features=0, plot_name="coeff"):
    grid = GridSearchCV(estimator=model, param_grid=parameters, verbose=0)
    grid.fit(x_train, y_train)
    sorted(grid.cv_results_.keys())
    classifier = grid.best_estimator_
    if make_feature_analysis:
        utils.plot_coefficients(classifier, feature_names, top_features, plot_name)
    y_hat = classifier.predict(x_test)
    utils.print_statistics(y_test, y_hat)
コード例 #4
0
ファイル: dl_models.py プロジェクト: qq345736500/sarcasm
def predict(model, x_test, y_test):
    y = []
    y_pred = []
    prediction_probability = model.predict(x_test)
    print("Predicted probability length: ", len(prediction_probability))
    for i, (_) in enumerate(prediction_probability):
        predicted = np.argmax(prediction_probability[i])
        y.append(int(y_test[i]))
        y_pred.append(predicted)
    utils.print_statistics(y, y_pred)
コード例 #5
0
def logistic_regression(x_train,
                        y_train,
                        x_test,
                        y_test,
                        class_ratio='balanced'):
    utils.print_model_title("Logistic Regression")
    regr = LogisticRegression(C=0.01, class_weight=class_ratio, penalty='l2')
    regr.fit(x_train, y_train)
    y_hat = regr.predict(x_test)
    utils.print_statistics(y_test, y_hat)
コード例 #6
0
ファイル: rule_based.py プロジェクト: qq345736500/sarcasm
def rule_based_comparison(x_train,
                          y_train,
                          x_test,
                          y_test,
                          vocab_filename,
                          verbose=False):
    # Build a vocabulary and count the sarcastic or non-sarcastic context in which a word appears
    vocab = data_proc.build_vocabulary(vocab_filename,
                                       x_train,
                                       minimum_occurrence=10)
    # vocab = set(' '.join([x.lower() for x in x_train]).split()) # this includes all words in the train set
    counts = {k: [0, 0] for k in vocab}
    for tw, y in zip(x_train, y_train):
        for word in tw.split():
            word = word.lower()
            if word in vocab:
                if y == 0:
                    counts[word][0] += 1
                else:
                    counts[word][1] += 1

    # Calculate the relative weight of each word, based on the sarcastic/non-sarcastic tweets that it appears
    weight = dict.fromkeys([k for k in counts.keys()], 0)
    for word in counts.keys():
        if counts[word][1] + counts[word][0] != 0:
            weight[word] = (counts[word][1] - counts[word][0]) / (
                counts[word][1] + counts[word][0])

    if verbose:
        total_sarcastic = sum([1 for y in y_train if y == 1])
        stopwords = data_proc.get_stopwords_list()
        probs = {
            word: (counts[word][1] / total_sarcastic)
            for word in counts.keys()
            if word not in stopwords and word.isalnum()
        }
        print("Top 10 most sarcastic items: ",
              ' '.join(sorted(probs, key=probs.get, reverse=True)[:10]))

    # Rule-based predictions based on the previously calculated weigths
    y_pred = []
    for tw, y in zip(x_test, y_test):
        score = 0.0
        for word in tw.split():
            word = word.lower()
            if word in vocab:
                score += weight[word]
        if score >= 0.0:
            y_pred.append(1)
        else:
            y_pred.append(0)
    utils.print_statistics(y_test, y_pred)
コード例 #7
0
ファイル: dl_models.py プロジェクト: qq345736500/sarcasm
def nn_bow_model(x_train,
                 y_train,
                 x_test,
                 y_test,
                 results,
                 mode,
                 epochs=15,
                 batch_size=32,
                 hidden_units=50,
                 save=False,
                 plot_graph=False):
    # Build the model
    print("\nBuilding Bow NN model...")
    model = Sequential()
    model.add(
        Dense(hidden_units,
              input_shape=(x_train.shape[1], ),
              activation='sigmoid'))
    model.add(Dense(1, activation='sigmoid'))
    model.summary()

    # Train using binary cross entropy loss, Adam implementation of Gradient Descent
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', utils.f1_score])
    history = model.fit(x_train,
                        y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=1)

    if plot_graph:
        utils.plot_training_statistics(history,
                                       "/plots/bow_models/bow_%s_mode" % mode)

    # Evaluate the model
    loss, acc, f1 = model.evaluate(x_test, y_test, batch_size=batch_size)
    results[mode] = [loss, acc, f1]
    classes = model.predict_classes(x_test, batch_size=batch_size)
    y_pred = [item for c in classes for item in c]
    utils.print_statistics(y_test, y_pred)
    print("%d examples predicted correctly." %
          np.sum(np.array(y_test) == np.array(y_pred)))
    print("%d examples predicted 1." % np.sum(1 == np.array(y_pred)))
    print("%d examples predicted 0." % np.sum(0 == np.array(y_pred)))

    if save:
        json_name = path + "/models/bow_models/json_bow_" + mode + "_mode.json"
        h5_weights_name = path + "/models/bow_models/h5_bow_" + mode + "_mode.json"
        utils.save_model(model,
                         json_name=json_name,
                         h5_weights_name=h5_weights_name)
コード例 #8
0
async def consume_the_messages_stream_consumer():
    stream_consumer = Consumer(
        brokers=config.KAFKA_URL,
        topics=[config.TOPIC],
        rdk_consumer_config=config.RDK_CONSUMER_CONFIG,
        rdk_topic_config=config.RDK_TOPIC_CONFIG
    )
    stream_consumer.start()

    messages_consumed = 0

    print("Starting to consume the messages.")
    with utils.Timer() as timer:
        async for message in stream_consumer:
            messages_consumed += 1
            if messages_consumed == config.MESSAGE_NUMBER:
                stream_consumer.stop()
    print(f"The time used to consume the messages is {timer.interval} "
          f"seconds.")
    utils.print_statistics(timer.interval)
コード例 #9
0
async def fill_topic_with_messages():
    producer = Producer(
        brokers=config.KAFKA_URL,
        rdk_producer_config=config.RDK_PRODUCER_CONFIG,
        rdk_topic_config=config.RDK_TOPIC_CONFIG,
    )
    producer.start()

    messages_consumed = 0

    print(f"Preparing benchmark. Filling topic  {config.TOPIC} with "
          f"{config.MESSAGE_NUMBER} messages of {config.MESSAGE_BYTES} bytes "
          f"each one.")
    await asyncio.sleep(0.1)

    with utils.Timer() as timer:
        for _ in range(config.MESSAGE_NUMBER):
            messages_consumed += 1
            await producer.produce(config.TOPIC, config.MESSAGE)
        producer.stop()
    print(f"The producer time to send the messages is {timer.interval} "
          f"seconds.")
    utils.print_statistics(timer.interval)
コード例 #10
0
ファイル: hmm_full_distort.py プロジェクト: iglesias/linal
	### prepare training, test data and evaluator

	train_data_file = 'hmsvm_%d_distort_data_fold' % distort
	train_num_examples_fold = 20
	train_num_folds = 5
	train_labels, train_features = utils.unfold_data(train_data_file)

	test_data_file = 'hmsvm_%d_distort_data_test' % distort
	test_num_examples = 100
	test_labels, test_features = utils.read_mat_file(test_data_file, test_num_examples)

	### train ML-HMM and evaluate in training data

	model = HMSVMModel(train_features, train_labels, SMT_TWO_STATE)
	model.set_use_plifs(True)
	mlhmm = MLHMM(model)
	mlhmm.train()

	prediction = mlhmm.apply()
	accuracy = evaluator.evaluate(prediction, train_labels)
	print '\ttraining accuracy:\t' + str(accuracy*100) + '%'
	utils.print_statistics(train_labels, prediction)

	### evaluate in test data

	prediction = mlhmm.apply(test_features)
	accuracy = evaluator.evaluate(prediction, test_labels)
	print '\ttest accuracy:\t\t' + str(accuracy*100) + '%'
	utils.print_statistics(test_labels, prediction)
コード例 #11
0
ファイル: hmm_xval.py プロジェクト: vishalbelsare/netpred
			assert(labels_k_fold.get_num_labels() == features_k_fold.get_num_vectors())

			for i in xrange(labels_k_fold.get_num_labels()):
				labels_no_kfold.add_label(labels_k_fold.get_label(i))
				features_no_kfold.set_feature_vector(features_k_fold.get_feature_vector(i), idx)
				idx += 1

	labels_no_fold.append(labels_no_kfold)
	features_no_fold.append(features_no_kfold)

### training and validation

evaluator = StructuredAccuracy()
evaluator.io.set_loglevel(MSG_DEBUG)
accuracies = []

print 'training HMM'
for k in xrange(K):
	model = HMSVMModel(features_no_fold[k], labels_no_fold[k], SMT_TWO_STATE)
	model.set_use_plifs(True)
	hmm = MLHMM(model)
	print '\ton fold %d' % k,
	hmm.train()
	prediction = hmm.apply(models[k].get_features())
	accuracy = evaluator.evaluate(prediction, models[k].get_labels())
	print str(accuracy*100) + '%'
	utils.print_statistics(models[k].get_labels(), prediction)
	accuracies.append(accuracy)

print 'overall success rate of ' + str(numpy.mean(accuracies)*100) + '%'
コード例 #12
0
            threads.append((id(thread) & 0xFFFF, event, thread))
            thread.setDaemon(True)
            thread.start()
            
        while(any(thread[2].isAlive() == True for thread in threads)):
        
            ready = select.select([ping_socket], [], [], 1)
            if not ready[0]:
                continue
                
            try:
                with lock:
                    recPacket, addr = ping_socket.recvfrom(2048)
            except socket.error:
                print("Can't receive")
            
            icmp_header = recPacket[20:28]
            type, code, checksum, port, sequence = struct.unpack('!BBHHH', icmp_header)
            
            index = [index for index, thread in enumerate(threads) if thread[0] == port]
            
            if type == 0 and index:
                threads[index[0]][2].add_received_packet(recPacket)
                threads[index[0]][1].set()
                
            
            
        print_statistics(threads)
        [thread.join() for id, event, thread in threads]
        ping_socket.close()
コード例 #13
0
def linear_svm(x_train, y_train, x_test, y_test, class_ratio='balanced'):
    utils.print_model_title("Linear SVM")
    svm = LinearSVC(C=0.01, class_weight=class_ratio, penalty='l2')
    svm.fit(x_train, y_train)
    y_hat = svm.predict(x_test)
    utils.print_statistics(y_test, y_hat)
コード例 #14
0
    doc_size = len(final_tokens)
    doc_id = "D" + str(doc_no)
    for i in range(len(final_tokens)):
        pos = i + 1
        word = final_tokens[i]
        #doc_id = "D" + str(doc_no)
        index_found = 0
        for index in range(len(index_list)):
            if index_list[index][0] == word:
                item = index_list[index][1]
                item.append([str(doc_id), pos])
                index_list[index][1] = item
                index_found = 1
                break
        if index_found == 0:
            index_list.append([word, [[str(doc_id), pos]]])
    doc_info[doc_id] = (doc_num, doc_size)

index_list.sort(key=itemgetter(0, 1))
with open('indexA.json', 'w') as outfile:
    json.dump(index_list, outfile)

with open('docinfoA.json', 'w') as outfile:
    json.dump(doc_info, outfile)

for item in index_list:
    word_dist[str(item[0])] = len(item[1])

index_size = len(index_list)
utils.print_statistics(doc_no, word_dist, index_size)
コード例 #15
0
	doc_size = len(final_tokens)	
	doc_id = "D" + str(doc_no)		
	for i in range(len(final_tokens)):
		pos = i+1
		word = final_tokens[i]
		#doc_id = "D" + str(doc_no)
		index_found = 0
		for index in range(len(index_list)):
			if index_list[index][0]==word:
				item = index_list[index][1]
				item.append([str(doc_id), pos])
				index_list[index][1] = item
				index_found = 1
				break
		if index_found == 0:
			index_list.append([word,[[str(doc_id),pos]]])
	doc_info[doc_id] = (doc_num, doc_size)
	
index_list.sort(key=itemgetter(0,1))
with open('indexA.json', 'w') as outfile:
    json.dump(index_list, outfile)

with open('docinfoA.json', 'w') as outfile:
    json.dump(doc_info, outfile)

for item in index_list:
	word_dist[str(item[0])] = len(item[1])

index_size = len(index_list)
utils.print_statistics(doc_no, word_dist, index_size)
コード例 #16
0
evaluator = StructuredAccuracy()

### train ML-HMM and evaluate in training data

print 'training ML-HMM'
model = HMSVMModel(train_features, train_labels, SMT_TWO_STATE)
model.set_use_plifs(True)
mlhmm = MLHMM(model)
mlhmm.train()
'''
print '\n\tmodel parameters:'
print '\t- transition scores: ' + str(numpy.exp(mlhmm.transition_scores))
print '\t- feature scores:'
for s,f in product(xrange(mlhmm.num_free_states), xrange(mlhmm.num_features)):
	print '\t\tstate %d feature %d:\n%s' % (s, f, str(numpy.exp(mlhmm.feature_scores[f,s,:])))
'''

prediction = mlhmm.apply()
accuracy = evaluator.evaluate(prediction, train_labels)
print '\n\ttraining accuracy: ' + str(accuracy * 100) + '%'
utils.print_statistics(train_labels, prediction)

### evaluate in test data

print 'testing ML-HMM'
prediction = mlhmm.apply(test_features)
accuracy = evaluator.evaluate(prediction, test_labels)
print '\ttest accuracy: ' + str(accuracy * 100) + '%'
utils.print_statistics(test_labels, prediction)
コード例 #17
0
def trainer(cfg: DictConfig) -> None:
    
    os.environ["L5KIT_DATA_FOLDER"] = cfg.l5kit_data_folder
    dm = LocalDataManager(None)

    logger = logging.getLogger(__name__)

    logger.info("Working directory : {}".format(os.getcwd()))

    logger.info("Load dataset...")

    train_cfg = cfg["train_data_loader"]
    valid_cfg = cfg["valid_data_loader"]

    # rasterizer
    rasterizer = build_rasterizer(cfg, dm)

    train_path = train_cfg["key"]
    train_zarr = ChunkedDataset(dm.require(train_path)).open(cached=False)

    logger.info(f"train_zarr {type(train_zarr)}")

    # loading custom mask (we mask static agents)
    logger.info(f"Loading mask in path {train_cfg['mask_path']}")
    custom_mask = np.load(train_cfg['mask_path'])
    logger.info(f"Length of training mask is: {custom_mask.sum()}")

    train_agent_dataset = AgentDataset(cfg, train_zarr, rasterizer, agents_mask=custom_mask)

    # transform dataset to the proper frame of reference
    train_dataset = TransformDataset(train_agent_dataset, cfg)

    if not train_cfg['subset'] == -1:
        train_dataset = Subset(train_dataset, np.arange(train_cfg['subset']))

    train_loader = DataLoader(train_dataset,
                              shuffle=train_cfg["shuffle"],
                              batch_size=train_cfg["batch_size"],
                              num_workers=train_cfg["num_workers"])

    logger.info(train_agent_dataset)

    # loading custom mask for validation dataset
    logger.info(f"Loading val mask in path {valid_cfg['mask_path']}")
    val_custom_mask = np.load(valid_cfg['mask_path'])
    logger.info(f"Length of validation mask is: {val_custom_mask.sum()}")

    valid_path = valid_cfg["key"]
    valid_zarr = ChunkedDataset(dm.require(valid_path)).open(cached=False)

    logger.info(f"valid_zarr {type(train_zarr)}")

    valid_agent_dataset = AgentDataset(cfg, valid_zarr, rasterizer, agents_mask=val_custom_mask)

    # transform validation dataset to the proper frame of reference
    valid_dataset = TransformDataset(valid_agent_dataset, cfg)

    if not valid_cfg['subset'] == -1:
        valid_dataset = Subset(valid_dataset, valid_cfg['subset'])

    valid_loader = DataLoader(
        valid_dataset,
        shuffle=valid_cfg["shuffle"],
        batch_size=valid_cfg["batch_size"],
        num_workers=valid_cfg["num_workers"]
    )

    logger.info(valid_agent_dataset)
    logger.info(f"# Full AgentDataset train: {len(train_agent_dataset)} #valid: {len(valid_agent_dataset)}")
    logger.info(f"# Actual AgentDataset train: {len(train_dataset)} #valid: {len(valid_dataset)}")

    n_epochs = cfg['train_params']['num_epochs']

    d_steps = cfg['train_params']['num_d_steps']
    g_steps = cfg['train_params']['num_g_steps']

    noise_dim = cfg['gan_params']['noise_dim']
    g_learning_rate = cfg['train_params']['g_learning_rate']
    d_learning_rate = cfg['train_params']['d_learning_rate']

    if cfg['gan_params']['gan_type'] == 'vanilla':
        cross_entropy = nn.BCELoss()

    generator = Generator(input_dim=cfg['gan_params']['input_dim'],
                          embedding_dim=cfg['gan_params']['embedding_dim'],
                          decoder_dim=cfg['gan_params']['decoder_dim'],
                          trajectory_dim=cfg['model_params']['future_num_frames'],
                          noise_dim=noise_dim,
                          backbone_type=cfg['gan_params']['backbone_type'],
                          embedding_type=cfg['gan_params']['embedding_type']
                          )

    generator.to(cfg['device'])
    generator.train()  # train mode
    
    W = cfg['raster_params']['raster_size'][0]
    discriminator = Discriminator(width=W,
                                  h_0=cfg['raster_params']['ego_center'][0]*W,
                                  w_0=cfg['raster_params']['ego_center'][1]*W,
                                  r=cfg['raster_params']['pixel_size'][0],
                                  sigma=cfg['gan_params']['sigma'],
                                  channels_num=cfg['model_params']['future_num_frames']+3,
                                  num_disc_feats=cfg['gan_params']['num_disc_feats'],
                                  input_dim=cfg['gan_params']['input_dim'],
                                  device=cfg['device'],
                                  gan_type=cfg['gan_params']['gan_type'],
                                  embedding_type=cfg['gan_params']['embedding_type'],
                                  lstm_embedding_dim=cfg['gan_params']['embedding_dim']
                                  )

    discriminator.to(cfg['device'])
    discriminator.apply(weights_init)
    discriminator.train()  # train mode

    if cfg['gan_params']['gan_type'] == 'wasserstein':
        optimizer_g = optim.RMSprop(generator.parameters(), lr=g_learning_rate)
        optimizer_d = optim.RMSprop(discriminator.parameters(), lr=d_learning_rate)
    elif cfg['gan_params']['gan_type'] == 'wasserstein_gp':
        betas = (0.0, 0.9)
        optimizer_g = optim.Adam(generator.parameters(), lr=g_learning_rate, betas=betas)
        optimizer_d = optim.Adam(discriminator.parameters(), lr=d_learning_rate, betas=betas)
    else:
        optimizer_g = optim.Adam(generator.parameters(), lr=g_learning_rate)
        optimizer_d = optim.Adam(discriminator.parameters(), lr=d_learning_rate)

    d_steps_left = d_steps
    g_steps_left = g_steps

    # variables for statistics
    d_full_loss = []
    g_full_loss = []
    gp_values = []
    l2_variety_values = []
    metric_vals = []

    # checkpoint dictionary
    checkpoint = {
        'G_losses': defaultdict(list),
        'D_losses': defaultdict(list),
        'counters': {
            't': None,
            'epoch': None,
        },
        'g_state': None,
        'g_optim_state': None,
        'd_state': None,
        'd_optim_state': None
    }

    id_batch = 0

    # total number of batches
    len_of_epoch = len(train_loader)

    for epoch in range(n_epochs):
        for batch in train_loader:
            batch = [tensor.to(cfg['device']) for tensor in batch]

            # Creates single raster image from sequence of images from l5kit's AgentDataset
            batch[0] = f_get_raster_image(cfg=cfg,
                                          images=batch[0],
                                          history_weight=cfg['model_params']['history_fading_weight'])

            (image, target_positions, target_availabilities,
             history_positions, history_yaws, centroid, world_to_image) = batch

            actor_state = (history_positions, history_yaws)

            batch_size = image.shape[0]

            # noise for generator
            noise = torch.normal(size=(batch_size, noise_dim),
                                 mean=0.0,
                                 std=1.0,
                                 dtype=torch.float32,
                                 device=cfg['device'])

            #######################################
            #       TRAIN DISCRIMINATOR
            #######################################

            # train discriminator (d_steps_left) times (using different batches)
            # train generator (g_steps_left) times (using different batches)

            if d_steps_left > 0:
                d_steps_left -= 1

                for pd in discriminator.parameters():  # reset requires_grad
                    pd.requires_grad = True  # they are set to False below in generator update

                # freeze generator while training discriminator
                for pg in generator.parameters():
                    pg.requires_grad = False

                discriminator.zero_grad()

                # generate fake trajectories (batch_size, target_size, 2) for current batch
                fake_trajectory = generator(image, actor_state, noise)

                # discriminator predictions (batch_size, 1) on real and fake trajectories
                d_real_pred = discriminator(target_positions, image, actor_state)
                d_g_pred = discriminator(fake_trajectory, image, actor_state)

                # loss
                if cfg['gan_params']['gan_type'] == 'vanilla':
                    # tensor with true/fake labels of size (batch_size, 1)
                    real_labels = torch.full((batch_size,), 1, dtype=torch.float, device=cfg['device'])
                    fake_labels = torch.full((batch_size,), 0, dtype=torch.float, device=cfg['device'])

                    real_loss = cross_entropy(d_real_pred, real_labels)
                    fake_loss = cross_entropy(d_g_pred, fake_labels)

                    total_loss = real_loss + fake_loss
                elif cfg['gan_params']['gan_type'] == 'wasserstein':  # D(fake) - D(real)
                    total_loss = torch.mean(d_g_pred) - torch.mean(d_real_pred)
                elif cfg['gan_params']['gan_type'] == 'wasserstein_gp':
                    gp_loss = gradient_penalty(discrim=discriminator,
                                               real_trajectory=target_positions,
                                               fake_trajectory=fake_trajectory,
                                               in_image=image,
                                               in_actor_state=actor_state,
                                               lambda_gp=cfg['losses']['lambda_gp'],
                                               device=cfg['device'])

                    total_loss = torch.mean(d_g_pred) - torch.mean(d_real_pred) + gp_loss
                else:
                    raise NotImplementedError

                # calculate gradients for this batch
                total_loss.backward()
                optimizer_d.step()

                # weight clipping for discriminator in pure Wasserstein GAN
                if cfg['gan_params']['gan_type'] == 'wasserstein':
                    c = cfg['losses']['weight_clip']
                    for p in discriminator.parameters():
                        p.data.clamp_(-c, c)

                d_full_loss.append(total_loss.item())

                if cfg['gan_params']['gan_type'] == 'wasserstein_gp':
                    gp_values.append(gp_loss.item())

            #######################################
            #         TRAIN GENERATOR
            #######################################

            elif g_steps_left > 0:  # we either train generator or discriminator on current batch
                g_steps_left -= 1

                for pd in discriminator.parameters():
                    pd.requires_grad = False  # avoid discriminator training

                # unfreeze generator
                for pg in generator.parameters():
                    pg.requires_grad = True

                generator.zero_grad()

                if cfg['losses']['use_variety_l2']:
                    l2_variety_loss, fake_trajectory = l2_loss_kmin(traj_real=target_positions,
                                                                    generator_=generator,
                                                                    image=image,
                                                                    actor_state=actor_state,
                                                                    cfg=cfg,
                                                                    kmin=cfg['losses']['k_min'],
                                                                    return_best_traj=True)
                else:
                    fake_trajectory = generator(image, actor_state, noise)

                d_g_pred = discriminator(fake_trajectory, image, actor_state)

                if cfg['gan_params']['gan_type'] == 'vanilla':
                    # while training generator we associate generated fake examples
                    # with real labels in order to measure generator quality
                    real_labels = torch.full((batch_size,), 1, dtype=torch.float, device=cfg['device'])
                    fake_loss = cross_entropy(d_g_pred, real_labels)
                elif cfg['gan_params']['gan_type'] in ['wasserstein', 'wasserstein_gp']:  # -D(fake)
                    fake_loss = -torch.mean(d_g_pred)
                else:
                    raise NotImplementedError

                if cfg['losses']['use_variety_l2']:
                    fake_loss += cfg['losses']['weight_variety_l2'] * l2_variety_loss

                    l2_variety_values.append(l2_variety_loss.item())

                fake_loss.backward()
                optimizer_g.step()

                g_full_loss.append(fake_loss.item())

            # renew d_steps_left, g_steps_left at the end of full discriminator-generator training cycle
            if d_steps_left == 0 and g_steps_left == 0:
                d_steps_left = d_steps
                g_steps_left = g_steps

            # print current model state on train dataset
            if (id_batch > 0) and (id_batch % cfg['train_params']['print_every_n_steps'] == 0):

                print_statistics(logger=logger,
                                 cfg=cfg,
                                 epoch=epoch,
                                 len_of_epoch=len_of_epoch,
                                 id_batch=id_batch,
                                 d_full_loss=d_full_loss,
                                 g_full_loss=g_full_loss,
                                 gp_values=gp_values,
                                 l2_variety_values=l2_variety_values,
                                 print_over_n_last=1000)

                # save rasterized image of 0th element of current batch
                plot_traj_on_map(cfg, 0, batch, generator, save_name=str(id_batch),
                                 save_directory=cfg['train_params']['image_sample_dir'])

            # Save checkpoint and evaluate the model
            if (id_batch > 0) and (id_batch % cfg['train_params']['checkpoint_every_n_steps'] == 0):
                checkpoint['counters']['t'] = id_batch
                checkpoint['counters']['epoch'] = epoch

                # Check stats on the validation set
                logger.info('Checking stats on val ...')
                metrics_val = evaluate(cfg, generator, valid_loader)
                metric_vals.append(metrics_val)

                with open('metric_vals_list.pkl', 'wb') as handle:
                    pickle.dump(metric_vals, handle, protocol=pickle.HIGHEST_PROTOCOL)

                for k, v in sorted(metrics_val.items()):
                    logger.info('  [val] {}: {:.3f}'.format(k, v))

                checkpoint['g_state'] = generator.state_dict()
                checkpoint['g_optim_state'] = optimizer_g.state_dict()
                checkpoint['d_state'] = discriminator.state_dict()
                checkpoint['d_optim_state'] = optimizer_d.state_dict()
                checkpoint_path = os.path.join(os.getcwd(), f"{cfg['model_name']}_{id_batch}.pt")
                logger.info('Saving checkpoint to {}'.format(checkpoint_path))
                torch.save(checkpoint, checkpoint_path)
                logger.info('Done.')

                results_df, metric_df = get_results_plot(d_full_loss,
                                                         g_full_loss,
                                                         metric_vals,
                                                         train_window_size=100,
                                                         val_window_size=10,
                                                         is_save=True)

                results_df.to_excel('results.xlsx', index=False)
                metric_df.to_excel('val_metrics.xlsx', index=False)

            id_batch = id_batch + 1
コード例 #18
0
ファイル: tss_analysis.py プロジェクト: iglesias/tests
    lo, hi = seq_limits[i]
    labels.add_vector_label(state_seq[lo:hi + 1])
    features.set_feature_vector(feat_mat[:, lo:hi + 1], i)

print 'num_labels=%d' % labels.get_num_labels()
print 'num_states=%d' % labels.get_num_states()
print 'num_features=%d' % features.get_num_features()
print 'num_vectors=%d' % features.get_num_vectors()

model = HMSVMModel(features, labels, SMT_TWO_STATE)
model.set_use_plifs(True)
# sosvm = DualLibQPBMSOSVM(model, labels, 5000.0)
# sosvm = StochasticSOSVM(model, labels)
# sosvm.set_lambda(1)
# sosvm.set_verbose(True)
hinge_loss = HingeLoss()
sosvm = PrimalMosekSOSVM(model, labels)
sosvm.set_regularization(50)
sosvm.io.set_loglevel(MSG_DEBUG)

print 'Training SO-SVM...'
sosvm.train()
print '\tdone!'
print sosvm.get_w()

predicted = sosvm.apply(model.get_features())
evaluator = StructuredAccuracy()
acc = evaluator.evaluate(predicted, model.get_labels())
print 'Training accuracy = %.4f' % acc
utils.print_statistics(labels, predicted)