Example #1
0
    def train_base_model(self,
                         data,
                         latent_size=20,
                         iterations=100,
                         lamda=0.05,
                         gamma=0.05,
                         verbose=True):
        '''
        training a SVD model - this is the main function of this class. Doing the iteration of learning according
        to the gradient descent idea, as described in the exercise instructions

        :param data: the data-frame to learn from. Should hold 3 columns - UserID, ItemID and Rank
        :param latent_size: number of latent feature to create. Default=20. Must be positive integer
        :param iterations: maximum number of iteration to run. The algo can stop before (if the RMSE rises between
                iterations). Default=100. Must be positive integer
        :param lamda: the lambda parameter in the algorithm, controls the learning rate.
                Default=0.05. Must be positive number, smaller than 1
        :param gamma: the gamma parameter in the algorithm, controls the learning rate.
                Default=0.05. Must be positive number, smaller than 1
        :param verbose: Boolean, whether to print things along the run
        :return: nothing, all stored in the class object
        '''

        start_time = datetime.now()
        # splitting to train and validation (validation is used as an internal dataset to avoid overfitting)
        train_data, validation_data = train_test_split(data,
                                                       test_size=0.3,
                                                       random_state=42)

        # calculating important values and saving them in the object
        self._users = set(train_data["UserID"])
        self._items = set(train_data["ItemID"])
        self._users_avg = data.groupby("UserID")["Rank"].mean().to_dict()
        self._items_avg = data.groupby("ItemID")["Rank"].mean().to_dict()
        self._user_item_avg = np.average(data["Rank"])

        # setting random values to the models parameters
        b_u = np.random.uniform(-0.1, 0.1, size=len(self._users))
        b_i = np.random.uniform(-0.1, 0.1, size=len(self._items))
        p_u = np.random.uniform(-0.1,
                                0.1,
                                size=(len(self._users), latent_size))
        q_i = np.random.uniform(-0.1,
                                0.1,
                                size=(len(self._items), latent_size))
        avg = np.average(train_data["Rank"])
        self._average = avg

        # converting some to data-frames (easier to save and handle later)
        p_u_df = pd.DataFrame(p_u,
                              index=list(self._users),
                              columns=range(0, latent_size))
        q_i_df = pd.DataFrame(q_i,
                              index=list(self._items),
                              columns=range(0, latent_size))
        b_u_df = pd.DataFrame(b_u, index=list(self._users))
        b_i_df = pd.DataFrame(b_i, index=list(self._items))

        # building the evaluation object to be used inside the loop
        eval_obj = evaluation.Evaluation()
        # staritng the gradinet descent phase, the loop can stop before we reach 'iterations' number of cycles
        for i in xrange(iterations):
            # start_time = datetime.now()
            # looping over each row in the dataset (user/item/rank) and updating the params
            for index, row in train_data.iterrows():
                cur_user = row["UserID"]
                cur_item = row["ItemID"]
                cur_rank = row["Rank"]
                dot_prod = sum(p_u_df.loc[cur_user] * q_i_df.loc[cur_item])
                cur_error = float(cur_rank - avg - b_u_df.loc[cur_user] -
                                  b_i_df.loc[cur_item] - dot_prod)
                b_u_df.loc[cur_user] += lamda * (cur_error -
                                                 gamma * b_u_df.loc[cur_user])
                b_i_df.loc[cur_item] += lamda * (cur_error -
                                                 gamma * b_i_df.loc[cur_item])
                q_i_df.loc[cur_item] += lamda * (
                    cur_error * p_u_df.loc[cur_user] -
                    gamma * q_i_df.loc[cur_item])
                p_u_df.loc[cur_user] += lamda * (
                    cur_error * q_i_df.loc[cur_item] -
                    gamma * p_u_df.loc[cur_user])
            # end of inner loop, now calculating the errors and decide if to go for another loop
            self._full_pred_matrix = p_u_df.dot(q_i_df.transpose())
            self._b_i = b_i_df
            self._b_u = b_u_df
            validation_pred = self.predict(new_data=validation_data)
            eval_obj.evaluate(true_ranks=list(validation_data["Rank"]),
                              predicted_ranks=validation_pred)
            # duration = (datetime.now() - start_time).seconds
            # print "Loop number {}, the RMSE is {}," \
            #       "this loop took us {} minutes".format(i, eval_obj.rmse, duration/60.0)
            # case the RMSE was improved, we'll save it and go for another loop
            if eval_obj.rmse < self.rmse:
                self.rmse = eval_obj.rmse
            # case we need to finish the algorithm, RMSE wasn't improved
            else:
                train_pred = self.predict(new_data=train_data)
                eval_obj.evaluate(true_ranks=list(train_data["Rank"]),
                                  predicted_ranks=train_pred)
                duration = (datetime.now() - start_time).seconds
                print "SVD model finished, took us {} loops and {} minutes." \
                      " RMSE measure in the train dataset is {}" \
                      " MAE measure in the train dataset is {}".format(i, duration/60.0, eval_obj.rmse, eval_obj.mae)
                break
def main():
    valid_categories = [
        'active', 'homeservices', 'hotelstravel', 'nightlife', 'pets',
        'restaurants', 'shopping', 'all'
    ]
    # region 1. Configurations (USER MUST DEFINE THESE)
    # must be out of the following: active, homeservices, hotelstravel, nightlife, pets, restaurants, shopping, all
    cur_category = 'pets'
    data_path = "C:\\Users\\abrahami\\Documents\\Private\\Uni\\BGU\\Reco.Systems - Bracha\\HW1\\Yelp"
    model = 'svd'  # should be one out of svd/cb/hybrid

    # input validity check
    if cur_category not in valid_categories:
        raise IOError("category provided in invalid")
    if cur_category == 'all' and model != 'svd':
        raise IOError(
            "modeling based on all categories is possible only with SVD model. In order to model with CB"
            "or hybrid all categories, you need to loop over all possible categories and combine results"
        )
    # endregion

    # region 2. Data load
    loader_obj = loader.Loader(path=data_path)
    if cur_category != 'all':
        data = loader_obj.load(category=cur_category,
                               d_train_all_categories=False)
    else:
        data = loader_obj.load(d_train_all_categories=True)

    train = data['train_data']
    test = data['test_data']
    print "Loading has just finished, shape of the train dataset is {}," \
          " shape of the test dataset is {}.\n" \
          "There are {} distinct users and {} distinct items in the training dataset." \
          " There are {} distinct users and {} distinct items in the test dataset\n".\
        format(train.shape, test.shape, len(train["UserID"].unique()), len(train["ItemID"].unique()),
               len(test["UserID"].unique()), len(test["ItemID"].unique()))
    # endregion

    # region 3. Modeling
    if model == 'svd':
        svd_obj = svd.SVDRecommender()
        svd_obj.train_base_model(data=train,
                                 latent_size=5,
                                 iterations=10,
                                 gamma=0.01,
                                 lamda=0.01)
        test_prediction = svd_obj.predict(new_data=test)

    if model == 'cb':
        cb_obj = content_based.ContentBasedRecommender(gamma=0.5)
        items_path = data_path + "\\" + cur_category + "\\" + "items.txt"
        cb_obj.train_tf_idf(path=items_path)
        test_prediction = cb_obj.predict(new_data=test, train_data=train)

    if model == 'hybrid':
        hybrid_obj = hybrid.HybridModeling()
        items_path = data_path + "\\" + cur_category + "\\" + "items.txt"
        hybrid_obj.train_hybrid(train_data=train, items_path=items_path)
        test_prediction = hybrid_obj.predict(new_data=test, train_data=train)
    # endregion

    # region 4. Evaluation
    eval_obj = evaluation.Evaluation(smart_rounding=True)
    eval_obj.evaluate(true_ranks=list(test["Rank"]),
                      predicted_ranks=test_prediction)
    print "\n{} model results: the RMSE of the test data-set is {}, the MAE is {}". \
        format(model, eval_obj.rmse, eval_obj.mae)
sys.path.insert(0,'class')
import model
import pathconfig
import preprocessing_images
import evaluation

'''
   Script py to evaluate the network Transfer Learning  
'''


paths = pathconfig.paths()


Model = model.Model()
Evaluation = evaluation.Evaluation()
model_Transfer_Learning= model.Transfer_Learning()         #Tranfer Learning model
path_test_set = paths.PATH_IMAGES_BLIND_TEST                     #the dataset chosen for evaluation. If you want change, you have to just change the
                                                           #path of dataset ( paths.PATH_IMAGES_TEST )
path_model = model_Transfer_Learning.path_model
path_weights = model_Transfer_Learning.path_weights

model = Model.load_model(path_model,path_weights)

Preprocessing_Images = preprocessing_images.Preprocessing_Images()
test_datagen, val_steps, classnames = Preprocessing_Images.get_set_from_path(setdata=path_test_set)

predictions = model.predict_generator(test_datagen,verbose=1,steps=val_steps)

Ypred = np.argmax(predictions, axis=1)  #label predicted
Ytest = test_datagen.classes            #label of test
Example #4
0
    def evaluation(self):
        self.table_ev_results.setRowCount(0)
        self.completed = 0
        algorithms = [0, 0, 0, 0]
        if self.check_ev_alg_jaccarda.isChecked():
            algorithms[0] = 1
        if self.check_ev_alg_f1_score.isChecked():
            algorithms[1] = 1
        if self.check_ev_alg_evs.isChecked():
            algorithms[2] = 1
        if self.check_ev_alg_mse.isChecked():
            algorithms[3] = 1
        ground_true_folder = self.line_ev_true_folder.text()
        true_segmentation_paths = self.loadImagesPathsInFolder(ground_true_folder, [".bmp"])
        predicted_folder = self.line_ev_pred_folder.text()
        predicted_segmentation_paths = self.loadImagesPathsInFolder(predicted_folder, [".jpg", ".png", ".bmp"])
        predicted_segmentation_paths = self.filterPredictedPathsForEvaluation(predicted_segmentation_paths, 'Binary')
        evaluation_paths = self.connectPaths(predicted_segmentation_paths, true_segmentation_paths)
        self.progressBar_evaluation.setValue(0)
        self.evaluation_results = []
        length = len(evaluation_paths)
        row = 0
        # ev = evaluation.Evaluation(evaluation_paths, algorithms)
        # for current_results in ev.getResults():
        for paths in evaluation_paths:
            paths_nested = []
            paths_nested.append(paths)
            ev = evaluation.Evaluation(paths_nested, algorithms)
            # Evaluation
            current_results = ev.getResults()
            # print("evaluation results: ", evaluation_results)
            # Displaying result
            current_results = current_results[0]
            pred_img = cv2.imread(current_results["predicted path"])
            true_img = cv2.imread(current_results["true path"])
            pred_img = cv2.resize(pred_img, (50, 50))
            true_img = cv2.resize(true_img, (50, 50))
            pred_img = QtGui.QImage(pred_img.data, pred_img.shape[1], pred_img.shape[0], 3 * pred_img.shape[1],
                                    QtGui.QImage.Format_RGB888)
            true_img = QtGui.QImage(true_img.data, true_img.shape[1], true_img.shape[0], 3 * true_img.shape[1],
                                    QtGui.QImage.Format_RGB888)
            pred_icon = QtGui.QIcon()
            true_icon = QtGui.QIcon()
            pred_icon.addPixmap(QtGui.QPixmap.fromImage(pred_img), QtGui.QIcon.Normal, QtGui.QIcon.Off)
            true_icon.addPixmap(QtGui.QPixmap.fromImage(true_img), QtGui.QIcon.Normal, QtGui.QIcon.Off)
            # rowPosition = self.table_ev_results.rowCount()
            self.table_ev_results.setRowCount(length)
            item = QTableWidgetItem(self.getShortFilePath(current_results["predicted path"]))
            item.setIcon(pred_icon)
            self.table_ev_results.setItem(row, 0, item)
            item = QTableWidgetItem(self.getShortFilePath(current_results["true path"]))
            item.setIcon(true_icon)
            self.table_ev_results.setItem(row, 1, item)
            self.table_ev_results.setItem(row, 2, QTableWidgetItem(str(round(current_results["jaccarda index weighted"],
                                                                             4))))
            self.table_ev_results.setItem(row, 3, QTableWidgetItem(str(round(current_results["f1 score weighted"], 4))))
            self.table_ev_results.setItem(row, 4, QTableWidgetItem(str(round(current_results["explained variance score"]
                                                                             , 4))))
            self.table_ev_results.setItem(row, 5, QTableWidgetItem(str(round(current_results["mean squared error"],
                                                                             4))))
            self.table_ev_results.resizeRowsToContents()
            row += 1
            # item = QListWidgetItem(pred_icon, true_icon, evaluation_results[0]["predicted path"],
            # evaluation_results[0] ["jaccarda index weighted"], evaluation_results[0]["f1 score weighted"],
            # evaluation_results[0] ["explained variance score"], evaluation_results[0]["mean squared error"]) #
            # item.setIcon(icon) self.list_ev_results.addItem(item)

            self.evaluation_results.append(current_results)
            # Update progress bar
            self.completed += int(100 / length)
            self.progressBar_evaluation.setValue(self.completed)
        self.progressBar_evaluation.setValue(100)
Example #5
0
from constants import *

representative_set_df = pd.read_pickle(os.path.join(DEFAULT_PICKLE_PATH, 'representative_set.pkl'))
subdir = '2021-03-31-08h-54m_batchsize_16_hg_4_loss_weighted_mse_aug_light_sigma4_learningrate_5.0e-03_opt_rmsProp_gt-4kp_activ_sigmoid_subset_0.50_wmse-1-5'

generator = data_generator.DataGenerator(
            df=representative_set_df,
            base_dir=DEFAULT_VAL_IMG_PATH,
            input_dim=INPUT_DIM,
            output_dim=OUTPUT_DIM,
            num_hg_blocks=1, # doesn't matter for evaluation b/c we take one stack for GT
            shuffle=False,
            batch_size=len(representative_set_df),
            online_fetch=False)

# %% Run visualization on epoch range and save images to disk

epochs_to_visualize = [27, 28] #range(34,45)
print("\n\nEval start:   {}\n".format(time.ctime()))
for epoch in epochs_to_visualize:
    eval = evaluation.Evaluation(
        model_sub_dir=subdir,
        epoch=epoch)
    X_batch, y_stacked = generator[0] # There is only one batch in the generator
    y_batch = y_stacked[0] # take first hourglass section
    m_batch = representative_set_df.to_dict('records') # TODO: eventually this will be passed from data generator as metadata
    eval.visualize_batch(X_batch, y_batch, m_batch)
print("\n\nEval end:   {}\n".format(time.ctime()))

# %%
BATCH_SIZE = 8
NUM_WORKERS = 0
NUM_VAL = 3
USE_GPU = True

if __name__ == '__main__':

    transformed_data = data_loader.CityScape(train=False, rand=-1)

    dataloaders = DataLoader(transformed_data,
                             batch_size=BATCH_SIZE,
                             shuffle=False,
                             num_workers=NUM_WORKERS)

    model = torch.load('./bak/model.pkl')
    evaluator_val = evaluation.Evaluation(34)
    images_so_far = 0
    model.eval()
    with torch.no_grad():
        for batches in dataloaders:
            if USE_GPU:
                inputs = batches['image'].cuda()
                labels = batches['label']
            else:
                inputs = batches['image']
                labels = batches['label']

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            result = preds.cpu()
Example #7
0
# import agent                                       # in work
# import agent_first_visit_mc_estimating_v as agent  # in work
# import agent_tabular_td0_estimating_v as agent     # in work
import agent_sarsa_learning as agent

import evaluation
import logging

# logging.basicConfig(format='%(lstateinfosevelname)-8s [%(filename)s:%(lineno)d] %(message)s', level=logging.DEBUG)
logging.basicConfig(
    format='%(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
    level=logging.WARNING)

env = environment.Environment()
agent = agent.Agent(env.get_action_space())
evaluation = evaluation.Evaluation(agent, env)  # liest nur aus den Objekten

count_episode_done = 0
for i_episode in range(10000):
    state, reward = env.reset('random')  # random or fix
    agent.reset()

    for t in range(
            60
    ):  #60):    # Achtung: cleasehr kritischer Parameter -> ca. 2 * laengster Weg
        action = agent.step(state, reward)
        state, reward, done, info = env.step(action)

        if done:
            agent.done(state, reward)
Example #8
0
    if len(sys.argv) != 6:
        sys.exit(
            "Usage: python main.py filedir golddir similarity ngramsLen method..."
        )

    u = utterances.Utterances(sys.argv[1], sys.argv[2])
    v = variations.Variations(u._utterances, sys.argv[3], sys.argv[4],
                              sys.argv[5])
    varset = v.mark_variation_sets()

    utterances_in_varset = 0
    sys.stdout = codecs.getwriter('utf8')(sys.stdout)
    for number, el in enumerate(varset):
        a, b, c = zip(*(el))
        utterances_in_varset += len(c)
        print "\n", number
        for ut in el:
            print ut[0], ut[1], ut[2]
    print "\nSTATS of MARKED VARIATION SETS in Files from: ", sys.argv[1]
    print "\nnumber of utterances: ", len(u._utterances)
    print "number of variation sets: ", len(varset)
    print "number of utterances in variation sets: ", utterances_in_varset
    gold_utterances = u._goldutterances
    gu_length = 0
    # get the total amount of utterances in variation sets
    for u in gold_utterances:
        gu_length += len(u)
    print "\n\nnumber of gold utterances: ", gu_length

    e = evaluation.Evaluation(varset, gold_utterances)
Example #9
0
def run_epoch(data, is_training, model, optimizer, args):
    '''
    Train model for one pass of train data, and return loss, acccuracy
    '''
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=args.num_workers,
                                              drop_last=True)

    losses = []

    if is_training:
        model.train()
    else:
        model.eval()

    for batch in tqdm(data_loader):

        cosine_similarity = nn.CosineSimilarity(dim=0, eps=1e-6)
        criterion = nn.MultiMarginLoss(margin=0.4)
        #pdb.set_trace()

        if is_training:
            optimizer.zero_grad()

        #out - batch of samples, where every sample is 2d tensor of avg hidden states
        bodies, bodies_masks = autograd.Variable(
            batch['bodies']), autograd.Variable(batch['bodies_masks'])

        if args.cuda:
            bodies, bodies_masks = bodies.cuda(), bodies_masks.cuda()

        out_bodies = model(bodies, bodies_masks)

        titles, titles_masks = autograd.Variable(
            batch['titles']), autograd.Variable(batch['titles_masks'])

        if args.cuda:
            titles, titles_masks = titles.cuda(), titles_masks.cuda()

        out_titles = model(titles, titles_masks)

        hidden_rep = (out_bodies + out_titles) / 2

        #Calculate cosine similarities here and construct X_scores
        #expected datastructure of hidden_rep = batchsize x number_of_q x hidden_size

        cs_tensor = autograd.Variable(
            torch.FloatTensor(hidden_rep.size(0),
                              hidden_rep.size(1) - 1))

        if args.cuda:
            cs_tensor = cs_tensor.cuda()

        #calculate cosine similarity for every query vs. neg q pair

        for j in range(1, hidden_rep.size(1)):
            for i in range(hidden_rep.size(0)):
                cs_tensor[i,
                          j - 1] = cosine_similarity(hidden_rep[i, 0, ],
                                                     hidden_rep[i, j, ])
                #cs_tensor[i, j-1] = cosine_similarity(hidden_rep[i, 0, ].type(torch.FloatTensor), hidden_rep[i, j, ].type(torch.FloatTensor))

        X_scores = torch.stack(cs_tensor, 0)
        y_targets = autograd.Variable(
            torch.zeros(hidden_rep.size(0)).type(torch.LongTensor))

        if args.cuda:
            y_targets = y_targets.cuda()

        if is_training:
            loss = criterion(X_scores, y_targets)
            print "Loss in batch", loss.data

            loss.backward()
            optimizer.step()

            losses.append(loss.cpu().data[0])

        else:
            #Average Precision = (sum_{i in j} P@i / j)  where j is the last index

            for i in range(args.batch_size):
                updateScores(args, cs_tensor, batch['similar'][i], i,
                             all_samples)

    # Calculate epoch level scores
    if is_training:
        avg_loss = np.mean(losses)
        print('Average Train loss: {:.6f}'.format(avg_loss))
        print()
    else:
        evalobj = evaluation.Evaluation(all_samples)
        print "MAP:", evalobj.MAP()
        print "MRR:", evalobj.MRR()
        print "P@5:", evalobj.Precision(5)
        print "P@1:", evalobj.Precision(1)
Example #10
0
def ComputeSimilarity(path, vectorizer, questions_dict, CROSS_DOMAIN):

	'''
	Expects a list of paths (containing a single element if validation data is from the same domain, two elements if the data is from another domain)
	'''

	all_q_scores = []

	questions_numbers = questions_dict.keys()

	dataset = createSamplesDataset(path, CROSS_DOMAIN, questions_numbers)

	sum_av_prec = 0.0
	sum_ranks = 0.0
	num_samples = 0.0
	top_5 = 0.0
	top_1 = 0.0
	auc_met = meter.AUCMeter()

	sorted_results = []

	for q in dataset:

		pos = dataset[q][0]
		rest_q = dataset[q][1]

		try:
			query_vector = vectorizer.transform([questions_dict[q]]).toarray()
		except:
			continue

		vector_label_list = createVectorLabelTuples(rest_q, questions_dict, q, pos, vectorizer)

		all_q_feature_vectors = [x[0] for x in vector_label_list]

		cs = cosine_similarity(all_q_feature_vectors, query_vector)

		cs_label_pair = []


		for index, question in enumerate(vector_label_list):
			cs_label_pair.append((cs[index], question[1]))


		if not CROSS_DOMAIN:
			scores_list = sorted(cs_label_pair, reverse = True, key=itemgetter(0))
			labels_only = [x[1] for x in scores_list]
			sorted_results.append(labels_only)


			#sum_av_prec, sum_ranks, num_samples, top_5, top_1 = updateScores(scores_list, sum_av_prec, sum_ranks, num_samples, top_5, top_1)
		else:
			auc_met.add(cs[index], question[1])
			all_q_scores.extend(cs_label_pair)


	if not CROSS_DOMAIN:
		evalobj = evaluation.Evaluation(sorted_results)

		print "MAP:", evalobj.MAP()
		print "MRR:", evalobj.MRR()
		print "P@5:", evalobj.Precision(5)
		print "P@1:", evalobj.Precision(1)
	else:
		print 'AUC: {:.4f}'.format(auc_met.value(0.05))
Example #11
0
    def train_model(self):
        '''Main function for training model.'''
        # Initialize running values.
        global_step = 0
        len_dev = len(self.dev_loader)
        # Training loop.
        self.model.train()

        for epoch in range(self.num_epochs):
            running_loss = 0.0
            logging.info("Epoch number: {}".format(epoch))
            print("epoch ", epoch)
            self.model.train()
            for batch_idx, batch in enumerate(self.train_loader):
                print(batch_idx)
                text = {
                    key: val.to(self.device)
                    for key, val in batch['text'].items()
                }
                images_features = batch['images_features'].to(
                    self.device).squeeze(0)
                actions_target = batch['actions'].to(self.device).squeeze(0)

                actions_binary = batch['actions_binary'].to(
                    self.device).squeeze(0)
                actions_output = self.model(text, images_features,
                                            actions_binary)

                loss = self.loss(actions_output, actions_target)

                self.optimizer.zero_grad()
                loss.backward()
                running_loss += loss
                self.optimizer.step()

            print("loss:", running_loss)

            ##############
            self.model.eval()
            evaluator = evaluation.Evaluation(len_dev)
            with torch.no_grad():
                for batch_idx, batch in enumerate(self.dev_loader):
                    print(batch_idx)
                    text = {
                        key: val.to(self.device)
                        for key, val in batch['text'].items()
                    }
                    images_feature = batch['images_features'][:, 0].to(
                        self.device)
                    route_panoids = batch['route_panoids']
                    route_panoids = [x[0] for x in route_panoids]
                    first_panoid = route_panoids[0]

                    panormid_pred = []
                    action_list = []
                    panormid_pred.append(first_panoid)
                    curr_state = (first_panoid, 0)
                    action = -1
                    action_binary = torch.zeros(4)
                    while action != ACTION_DICT['stop']:
                        actions_output = self.model(
                            text, images_feature,
                            action_binary.unsqueeze(0).to(self.device))
                        topv, topk = actions_output.data.topk(2)
                        action = topk.squeeze().tolist()[0]
                        new_state = navigator._get_next_graph_state(
                            curr_state, ACTION_DICT_IDX[action])
                        if action == ACTION_DICT['forward']:
                            if new_state[0] == curr_state[
                                    0]:  # Couldn't move forward.
                                action = topk.squeeze().tolist()[1]
                                new_state = navigator._get_next_graph_state(
                                    curr_state, ACTION_DICT_IDX[action])
                            else:
                                panormid_pred.append(new_state[0])
                        curr_state = new_state
                        panoid = curr_state[0]
                        feature_path = path.abspath(
                            path.join(MAIN_DIR, 'features', panoid + '.pt'))
                        images_feature = torch.load(feature_path).to(
                            self.device)
                        action_list.append(action)
                        action_binary = torch.zeros(4)
                        action_binary[action] = 1
                        if len(action_list) > 36:
                            action = 3
                            print("!!!!!")
                    evaluator.evaluate_single_sample(panormid_pred,
                                                     route_panoids)

                evaluator.performance_info()

        print("END")
    def simulatedSolution(self):
        nonDominated = list()
        Resultfile = open("result.txt", "a")
        Result = [
            [] for i in xrange(len(self.GeneticRules))
        ]  #it is depend on number of dispatching rules. GA kullaniliyor
        #Result=[ [] for i in xrange(14)]
        #Result[0].append(0)
        for dRulesID in range(len(self.GeneticRules)):
            #for dRulesID in range(14):
            #for h in range(10):
            self.__init__(self.problem, self.solution, self.visitindex,
                          self.GeneticRules, self.extraction)
            self.initialization()
            self.nextTime = self.currentTime
            z = min(self.nextEventsSet, key=lambda tup: tup[2]
                    )  #job in Evenset assigned to machines randomly
            lena = len(self.problem.jobs[z[0]].operations[z[1]].machineSet)
            a = np.random.randint(0, lena)

            assignedMachineId = self.problem.jobs[z[0]].operations[
                z[1]].machineSet[a].id
            self.assignment.append([z[0], z[1], assignedMachineId, 'r'])
            self.solution.machines[
                assignedMachineId].assigmentOperation.append(z[:2])
            self.solution.machines[assignedMachineId].mwlm.append(z[:2])
            self.solution.machines[
                assignedMachineId].mwlwm += self.problem.jobs[z[0]].operations[
                    z[1]].processingTimes[a]
            #print self.solution.jobs[z[0]].id,self.solution.jobs[z[0]].operations[z[1]].id,self.solution.jobs[z[0]].operations[z[1]].machineId
            self.update(self.assignment)
            for i in self.nextEventsSet:
                if i[2] == self.currentTime:
                    self.nextEventsSet.remove(i)
            while len(self.notFinishedOpSet) > 0:
                self.findNextTimeandEvents()
                self.currentTime = self.nextTime
                if len(self.releasedOpSet) > 0:
                    self.LeastWaitingTimeAssignment()
                lastStarted = self.updateMachineSet(self.lastAssigned,
                                                    self.machineEventSet,
                                                    dRulesID)
                self.lastAssigned = list()
                if len(lastStarted) > 0:
                    self.update(lastStarted)
                '''for i in self.nextEventsSet:
                    if i[2]==self.currentTime and i[3]=='r':
                        self.nextEventsSet.remove(i)
    '''
            #a=TS.tranformation(self.problem,self.solution)#used for draw gantt chart
            mal = EV.Evaluation(self.solution)

            Resultfile.write(str(dRulesID) + "\t")
            Resultfile.write(str(mal[0]) + "\t")
            Resultfile.write(str(mal[1][0]) + "\t")
            Resultfile.write(str(mal[1][1]) + "\n")

            Result[dRulesID] += [dRulesID, mal[0], mal[1][0],
                                 mal[1][1]]  #GA kullanilir
            af = 4
            #nonDominated.append([mal[0],mal[1][0],mal[1][1]])
        #print d,mal[0],mal[1][0],mal[1][1]
        #print nonDominated
        #nDominated=NDS.sorting(nonDominated)
        Resultfile.close()

        return Result  #GA kullaniliyor
Example #13
0
    def train_and_dev(self,
                      sess,
                      x,
                      y,
                      seq_len,
                      batch_size,
                      test_x,
                      test_y,
                      test_seq_len,
                      epoch_size,
                      data=None):
        learning_rate = 1e-3
        train_aux_xs = data.train_aux_xs
        train_aux_xs_len = data.train_aux_xs_len
        train_aux_len = data.train_aux_len

        test_aux_xs = data.dev_aux_xs
        test_aux_xs_len = data.dev_aux_xs_len
        test_aux_len = data.dev_aux_len

        best_acc = 0.0
        try:
            for epoch in range(epoch_size):
                total_cost = 0.0
                total_batch = 0
                total_acc_num = 0
                for batch_x, batch_y, batch_len, batch_xs, batch_xs_len, batch_aux_len in batch_iter(
                        x,
                        y,
                        seq_len,
                        batch_size,
                        train_aux_xs,
                        train_aux_xs_len,
                        train_aux_len,
                        shuffle=True):
                    total_batch += 1
                    _, cost_val, acc_cnt = sess.run(
                        [self.train_op, self.cost, self.acc_cnt],
                        feed_dict={
                            self.in_x: batch_x,
                            self.in_y: batch_y,
                            self.in_len: batch_len,
                            self.aux_xs: batch_xs,
                            self.aux_xs_len: batch_xs_len,
                            self.aux_len: batch_aux_len,
                            self.learning_rate: learning_rate,
                            self.dropout_rate: 0.0
                        })
                    total_acc_num += acc_cnt
                    total_cost += cost_val
                    if total_batch % 30 == 0:
                        print('batch_%d cost_val: %0.5f' %
                              (total_batch, cost_val))
                print('Epoch:', '%02d' % (epoch + 1), 'cost_avg =',
                      '%0.5f' % (total_cost / total_batch),
                      'acc: %0.5f' % (total_acc_num / (0.0 + len(x))))

                if epoch < 4 and epoch % 2 == 1:
                    learning_rate /= 10.
                    print('drop learning rate, Epoch:{} - {}'.format(
                        epoch + 1, learning_rate))

                ###
                total_acc_num_test = 0
                for batch_test_x, batch_test_y, batch_len, batch_xs, batch_xs_len, batch_aux_len in batch_iter(
                        test_x, test_y, test_seq_len, 200, test_aux_xs,
                        test_aux_xs_len, test_aux_len):
                    acc_cnt_test = sess.run(self.acc_cnt,
                                            feed_dict={
                                                self.in_x: batch_test_x,
                                                self.in_y: batch_test_y,
                                                self.in_len: batch_len,
                                                self.aux_xs: batch_xs,
                                                self.aux_xs_len: batch_xs_len,
                                                self.aux_len: batch_aux_len,
                                                self.dropout_rate: 0.0
                                            })
                    total_acc_num_test += acc_cnt_test
                cur_acc = total_acc_num_test / (0.0 + len(test_x))
                print('test acc: {:.5f}'.format(cur_acc))

                if best_acc < cur_acc:
                    best_acc = cur_acc
                    self.saver.save(sess,
                                    config.save_dir + '/rcnn_saver.ckpt',
                                    global_step=epoch + 1)
                    probs, preds = self.predict(sess, data, False)
                    self.outputs(preds)
                    evaluation.Evaluation(
                        config.dev_file,
                        config.dev_predict_file.format(self.params.model_name))
        except Exception:
            print(Exception)
Example #14
0
def main():

    args = decode_args(sys.argv)

    to_dos = [("DATA/Swedish_MINGLE_dataset/plain/1",
               "DATA/Swedish_MINGLE_dataset/GOLD/1"),
              ("DATA/Swedish_MINGLE_dataset/plain/2",
               "DATA/Swedish_MINGLE_dataset/GOLD/2"),
              ("DATA/Swedish_MINGLE_dataset/plain/3",
               "DATA/Swedish_MINGLE_dataset/GOLD/3"),
              ("DATA/Swedish_MINGLE_dataset/plain/4",
               "DATA/Swedish_MINGLE_dataset/GOLD/4"),
              ("DATA/Childes/GOLD/French/cleaned/plain",
               "DATA/Childes/GOLD/French/cleaned/annotated")]

    fuzzy_precisions, strict_precisions, fuzzy_recalls, strict_recalls,\
            fuzzy_f1s, strict_f1s = [], [], [], [], [], []

    for to_do in to_dos:
        print("Finding variation sets in" + to_do[0])
        u = utterances.Utterances(to_do[0], to_do[1])
        gold_utterances = u._goldutterances

        utterances_reformatted = []
        ids = []

        for utterance in u._utterances:
            new_utt = utterance[2].split()
            utterances_reformatted.append(new_utt)
            ids.append((utterance[0], utterance[1]))

        utt_iter = window(utterances_reformatted, args[1])
        id_iter = window(ids, args[1])
        ids = [i for i in id_iter]

        if args[0] == "anch":
            ids_and_matches = matches_anchor(utt_iter, args[2], args[3],
                                             args[4], False, ids)
        else:
            ids_and_matches = matches_incremental(utt_iter, args[2], args[3],
                                                  args[4], False, ids)

        combined = convert_varseta_format(ids_and_matches)

        varseta_eval = evaluation.Evaluation(combined, gold_utterances)

        fuzzy_precisions.append(varseta_eval.fuzzy_precision)
        strict_precisions.append(varseta_eval.strict_precision)
        fuzzy_recalls.append(varseta_eval.fuzzy_recall)
        strict_recalls.append(varseta_eval.strict_recall)
        fuzzy_f1s.append(varseta_eval.fuzzy_f1)
        strict_f1s.append(varseta_eval.strict_f1)

        print('\tFuzzy Precision: {:0.2f}'.format(
            varseta_eval.fuzzy_precision))
        print('\tFuzzy Recall: {:0.2f}'.format(varseta_eval.fuzzy_recall))
        print('\tFuzzy F1: {:0.2f}'.format(varseta_eval.fuzzy_f1))
        print('')
        print('\tStrict Precision: {:0.2f}'.format(
            varseta_eval.strict_precision))
        print('\tStrict Recall: {:0.2f}'.format(varseta_eval.strict_recall))
        print('\tStrict F1: {:0.2f}'.format(varseta_eval.strict_f1))
        print('\n')

    avg_fuzzy_precision = sum([i for i in fuzzy_precisions
                               ]) / len(fuzzy_precisions)

    avg_fuzzy_recall = sum([i for i in fuzzy_recalls]) / len(fuzzy_recalls)
    avg_fuzzy_f1 = sum([i for i in fuzzy_f1s]) / len(fuzzy_f1s)
    avg_strict_precision = sum([i for i in strict_precisions
                                ]) / len(strict_precisions)
    avg_strict_recall = sum([i for i in strict_recalls]) / len(strict_recalls)
    avg_strict_f1 = sum([i for i in strict_f1s]) / len(strict_f1s)

    print('Average Scores:')
    print('Average Fuzzy Precision: {:0.2f}'.format(avg_fuzzy_precision))
    print('Average Fuzzy Recall: {:0.2f}'.format(avg_fuzzy_recall))
    print('Average Fuzzy F1: {:0.2f}'.format(avg_fuzzy_f1))
    print('')
    print('Average Strict Precision: {:0.2f}'.format(avg_strict_precision))
    print('Average Strict Recall: {:0.2f}'.format(avg_strict_recall))
    print('Average Strict F1: {:0.2f}'.format(avg_strict_f1))
Example #15
0
    # valid information
    valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings,
                                              valid_dataset)
    similarity = tf.matmul(valid_embeddings,
                           normalized_embeddings,
                           transpose_b=True)

    # merge all summaries
    merged = tf.summary.merge_all()

    # create a saver
    saver = tf.train.Saver()

    # analogy evaluator
    evaluator = evaluation.Evaluation(normalized_embeddings, W2I, questions)

    # global init
    init = tf.global_variables_initializer()
"""
Training procedure
"""
num_steps = int(math.ceil(
    data.shape[0] / BATCH_SIZE)) * NB_EPOCHS  # num steps to train
data_index = 0  # determine which batch to extract
average_loss = 0  # overall loss value
nth_epoch = 1
with tf.Session(graph=graph) as session:
    # open a writer to write summaries.
    writer = tf.summary.FileWriter(TMP_DIR, session.graph)
Example #16
0
def launcher(
        group_by,
        path_list='/sequoia/data2/gcheron/UCF101/detection',
        path_info='/sequoia/data2/gcheron/UCF101/detection/mytracksK5_600k',
        path_tracks='/sequoia/data2/gcheron/pytorch/diffrac_action_localization/UCF101/results/mytracksK5_600k/tracks/',
        prepend_name='UCF101_spatiotemporal_at_least_one_gb',
        feat_type='RGB+OPF',
        sp_iou_thresh=0.3,
        write_eval=False,
        n_iterations=10000,
        alpha=1e-4,
        n_actions=25,
        cache_dir='/sequoia/data2/jalayrac/nips2017weakpose/cache/',
        res_dir='/sequoia/data2/jalayrac/nips2017weakpose/',
        cstrs_name='at_least_one_per_instance_unit_time',
        exp_suffix='',
        rdm_seed=19,
        path_log_eval=None,
        save_pr_curves=False,
        eval_frequency=500,
        val_eval=False,
        video_eval_args=None,
        video_eval_only=False,
        calibrate=False,
        use_calibration=False,
        no_init=False,
        no_feat_init=False):

    np.random.seed(rdm_seed)

    bias_value = BIAS_VALUE
    assert bias_value == 100, 'This value is hard coded in evaluation'

    need_init = not (video_eval_only or no_init)
    need_feat_init = not (video_eval_only or no_feat_init)
    if calibrate:
        mode = 'val'
    else:
        mode = 'train'

    # Get the features and GT.
    if need_feat_init:
        feats_train, labels_train, list_vid = load_feats_and_gts(
            mode, path_list, prepend_name, feat_type, group_by, sp_iou_thresh,
            cache_dir, path_tracks, path_info, n_actions)

    validation_info = None
    if need_feat_init:
        if val_eval:
            feats_val, labels_val, _ = load_feats_and_gts(
                'test', path_list, prepend_name, feat_type, group_by,
                sp_iou_thresh, cache_dir, path_tracks, path_info, n_actions)
            validation_info = {'gts': labels_val, 'feats': feats_val}

    # Get the at least one constraints for the training set.
    print('Dealing with constraints for {} set'.format(mode))
    # For each video get the data.

    name_cstrs = 'cstrs_{}_{}_gb_{}_spth_{}_{}'.format(prepend_name,
                                                       cstrs_name, group_by,
                                                       sp_iou_thresh, mode)
    path_cstrs = os.path.join(cache_dir, name_cstrs + '.npy')

    fn_cstrs = 'get_{}_cstr_for_video'.format(cstrs_name)
    compute_cstrs = True
    if not need_init:
        compute_cstrs = False
    elif os.path.isfile(path_cstrs):
        cstrs_train = np.load(path_cstrs).tolist()
        compute_cstrs = False

    if compute_cstrs:
        cstrs = []
        for id_vid, video_name in enumerate(tqdm(list_vid)):
            present, constraint = data_handler.get_general_cstr_for_video(
                video_name,
                groupby=group_by,
                path_info=path_info,
                path_tracks=path_tracks,
                n_actions=n_actions,
                constr_fn_name=fn_cstrs)
            if present:
                cstrs.append(constraint)
            assert present, (
                '{} is missing: please provide a list were all videos have gt/features/labels...'
            ).format(video_name)

        print('Saving constraints...')
        np.save(path_cstrs, cstrs)
        cstrs_train = cstrs

    # Generate random init.
    print('Generating a random Y init...')
    asgn_train = []

    list_at_least_one_cstr = [
        'at_least_one_per_instance', 'at_least_one_per_instance_unit_time',
        'fully_supervised', 'spot_on',
        'at_least_one_per_temporal_point_unit_time',
        'at_least_one_per_instance_unit_time_with_keyframes',
        'at_least_one_per_temporal_point_unit_time_with_keyframes',
        'at_least_one_clip_level', 'at_least_one_shot_level'
    ]
    if not need_init:
        at_least_one_solver = None
        pass
    elif cstrs_name in list_at_least_one_cstr:
        at_least_one_solver = linear_solvers.AtLeastOneSolver()
    else:
        raise ValueError(
            'cstrs_name: {} is not a valid option.'.format(cstrs_name))

    name_cstrs_init = 'cstrs_init_{}_{}_gb_{}_spth_{}_{}'.format(
        prepend_name, cstrs_name, group_by, sp_iou_thresh, mode)

    path_cstrs_init = os.path.join(cache_dir, name_cstrs_init + '.pkl')

    if not need_init:
        pass
    elif os.path.isfile(path_cstrs_init):
        with open(path_cstrs_init, 'rb') as f_cstrs:
            cstrs_init_dict = pickle.load(f_cstrs)
        asgn_train = cstrs_init_dict['asgn_train']
    else:
        n_rdm_init = 5
        for y, cstr in tqdm(zip(labels_train, cstrs_train)):
            # Generate a random gradient
            asgn_init = np.zeros(y.shape)

            for _ in range(n_rdm_init):
                rand_grad = np.random.randn(y.shape[0], y.shape[1])
                asgn_init += 1.0 / n_rdm_init * at_least_one_solver.solve(
                    cstr, rand_grad)

            asgn_train.append(asgn_init)

        # Save to path.
        with open(path_cstrs_init, 'wb') as f_cstrs:
            pickle.dump({'asgn_train': asgn_train}, f_cstrs)

    # Create the experiment name.
    exp_name = '{}_{}_niter_{}_gb_{}_{}_spth_{}_lambda_{}_pratio_{}_{}_slack_{}_beta_{}_delta_{}{}'.format(
        prepend_name, cstrs_name, n_iterations, group_by, mode, sp_iou_thresh,
        alpha, -1, feat_type, False, -1.0, -1.0, exp_suffix)

    exp_name_val = re.sub('train_spth_', 'val_spth_', exp_name)

    # set out dirs/paths
    path_out = os.path.join(res_dir, exp_name)
    path_out_val = os.path.join(res_dir, exp_name_val)
    name_asgn = 'asgn.npy'.format(exp_name)
    path_asgn = os.path.join(path_out, name_asgn)

    # get a classifier
    name_w = 'w.pkl'.format(exp_name)
    path_w = os.path.join(path_out, name_w)

    call('mkdir -p {}'.format(path_out), shell=True)

    if video_eval_only:
        weights = None
        pass
    else:
        test_fun = None
        if os.path.isfile(path_w):
            with open(path_w) as f_w:
                weights = pickle.load(f_w)
        else:
            # Launch FW optim.
            path_log_eval = os.path.join(
                path_log_eval, exp_name) if path_log_eval is not None else None
            print('Launching the BCFW optim...')
            asgn_final, weights = bcfw_diffrac.solver(
                feats_train,
                asgn_train,
                cstrs_train,
                at_least_one_solver,
                gts=labels_train,
                alpha=alpha,
                verbose='normal',
                bias_value=bias_value,
                block_sampling='gap_sampling',
                n_iterations=n_iterations,
                objective_frequency=250,
                eval_frequency=eval_frequency,
                eval_function=eval_functions.MAP(
                    n_actions - 1,
                    path_save=path_log_eval,
                    save_pr_curves=save_pr_curves),
                validation_info=validation_info)
            # Save assignmnent.
            np.save(path_asgn, asgn_final)
            # save final classifier
            with open(path_w, 'wb') as f_w:
                pickle.dump(weights, f_w)

    if calibrate and not use_calibration:
        # need to test the validation videos to calibrate
        list_file = '{}/OF_vidlist_{}1.txt'.format(path_list,
                                                   'valtrainRate10_val')
    else:
        list_file = '{}/OF_vidlist_{}1.txt'.format(path_list, 'test')
    # Evaluate on test data.
    if video_eval_only:
        pass
    elif write_eval:
        print('Writing evaluation files in {}'.format(path_out))
        with open(list_file, 'r') as f_list:
            list_vid = f_list.readlines()
            list_vid = [x.strip().split(' ')[0] for x in list_vid]

        for id_vid, video_name in enumerate(tqdm(list_vid)):
            data_handler.write_eval_data(video_name,
                                         weights,
                                         bias_value=bias_value,
                                         path_root_out=os.path.join(
                                             path_out, 'tracks'),
                                         path_info=path_info,
                                         path_tracks=path_tracks,
                                         feat_type=feat_type,
                                         n_actions=n_actions,
                                         test_fun=test_fun)

    if write_eval or video_eval_only:
        if use_calibration:
            calib_path = os.path.join(path_out_val, 'calibration.pkl')
            print 'Load calibration from:\n{}'.format(calib_path)
            with open(calib_path) as f:
                calibration = pickle.load(f)
            loc_th = np.zeros((n_actions - 1, len(video_eval_args['iou'])))
            for i, iou in enumerate(video_eval_args['iou']):
                loc_th[:,
                       i] = calibration[iou]  # Set one th per action, per iou.
        else:
            loc_th = video_eval_args['loc_th']

        # clear eventual existing evaluation cache
        call('rm -rf  {}/evaluation_cache'.format(path_out), shell=True)

        # create instance to eval video mAP
        ev = evaluation.Evaluation(
            video_eval_args['datasetname'], [path_out],
            list_file,
            video_eval_args['iou'],
            smooth_window=25,
            loc_th=loc_th,
            track_class_agnostic=video_eval_args['track_class_agnostic'],
            force_no_regressor=True,
            nthreads=8,
            one_th_per_iou=use_calibration)
        return ev
Example #17
0
preprocessor = preprocessing.PreProcessing()

X_train, x_test, Y_train, y_test = preprocessor.preprocess(
    data=training_data,
    test_size=0.25,
    train_size=0.75,
    random_state=69,
    target_variable="Value"  # Possibilites: "Value", "Wage", "Release_Clause"
)

### MODEL BUILDING ###
modeller = model.Model()

gbr_model, rfr_model, dtr_model = modeller.train_models(X_train,
                                                        Y_train,
                                                        n_estimators=1000,
                                                        max_depth=5,
                                                        learning_rate=0.1,
                                                        random_state=69,
                                                        n_jobs=6)

### EVALUATION ###
evaluator = evaluation.Evaluation()

evaluator.evaluate_models(gbr_model=gbr_model,
                          rfr_model=rfr_model,
                          dtr_model=dtr_model,
                          x_test=x_test,
                          y_test=y_test,
                          save_plots=True)
Example #18
0
    def noteSegmentationFeatureExtraction(self,
                                          pitchtrackNoteFolderPath,
                                          featureVecFolderPath,
                                          pitchtrackNotePredictFolderPath,
                                          recordingNames,
                                          segCoef=0.2,
                                          predict=False,
                                          evaluation=False):
        '''
        This process will do    1) segment pitchtrack into notes which boundaries are given by pYIN
                                2) refined segmentation searching stable part
                                3) calculate features on refined segments
        :param pitchtrackNoteFolderPath:
        :param featureVecFolderPath:
        :param recordingNames:
        :param predict:
        :return: refined segments boundaries, refined segments pitch contours
        '''

        ############################################## segmentation ########################################################
        # below two lines will do segmentaion on calculation the pyin
        # ptSeg = pitchtrackSegByNotes.pitchtrackSegByNotes(samplingFreq, frameSize, hopSize)
        # ptSeg.doSegmentation(pitchtrack, fs.m_oMonoNoteOut)

        ptSeg = pitchtrackSegByNotes.pitchtrackSegByNotes()

        if evaluation:
            evalu1 = evalu.Evaluation()  #  evaluation object
            COnOffall, COnall, OBOnall, OBOffall, gt, st = 0, 0, 0, 0, 0, 0

        for rn in recordingNames:
            pitchtrack_filename = os.path.join(pitchtrackNoteFolderPath,
                                               rn + '_pitchtrack.csv')
            monoNoteOut_filename = os.path.join(pitchtrackNoteFolderPath,
                                                rn + '_monoNoteOut.csv')

            ptSeg.doSegmentationForPyinVamp(pitchtrack_filename,
                                            monoNoteOut_filename)

            if evaluation:
                coarseSegmentation_filename = os.path.join(
                    pitchtrackNoteFolderPath, rn + '_coarseSeg.txt')
                ptSeg.coarseSegmentation(
                    monoNoteOut_filename,
                    coarseSegmentation_filename)  #  groundtruth segmentation

            # ptSeg.pltNotePitchtrack(saveFig=True, figFolder='../jingjuSegPic/laosheng/train/male_13/pos_3_midinote/')

        ###################### calculate the polynomial fitting coefs and vibrato frequency ############################
        #  use pitch track ptseg.pitchtrackByNotes from last step

            featureDict = {}  # feature vectors dictionary
            segmentsExport = {}  # refined segmentation boundaries
            refinedPitchcontours = {}  # refined pitch contours
            extremas = {}  # extremas
            vibrato = {}
            curvefittingDeg = 1
            jj = 1
            jjNone = []
            jjj = 1
            for ii in range(len(ptSeg.pitchtrackByNotes)):
                pt = ptSeg.pitchtrackByNotes[ii][0]
                pt = np.array(pt, dtype=np.float32)
                x, y = self.normalizeNotePt(
                    pt)  #  normalise x to [0,1], remove y DC
                sbp = self.localMinMax(
                    y)  #  local minima and extrema of pitch track
                self.diffExtrema(
                    x, y)  #  the amplitude difference of minima and extrema
                self.segmentPointDetection1(
                    segCoef)  #  do the extrema segmentation here

                #  nc1.segmentPointDetection2()  # segmentation point
                self.segmentRefinement(pt)  #  do the refined segmentation
                #print self.refinedNotePts

                for rpt in self.refinedNotePts:
                    # print jj

                    featureVec, extrema, vibOut = self.featureExtractionProcess(
                        rpt, sbp, curvefittingDeg)
                    if featureVec[0]:
                        refinedPitchcontours[jj] = rpt.tolist()
                        featureDict[jj] = featureVec.tolist()
                        extremas[jj] = extrema
                        vibrato[jj] = vibOut
                    else:
                        jjNone.append(jj)

                    #  this plot step is slow, if we only want the features, we can comment this line
                    #nc1.pltRefinedNotePtFc(xRpt, yRpt, p, rsquare, polyVar, vibFreq, saveFig=True,
                    #                        figFolder='../jingjuSegPic/laosheng/train/refinedSegmentCurvefit/'+rn+'_curvefit_refined/',
                    #                        figNumber = jj)
                    jj += 1

                if predict:
                    # construct the segments frame vector: frame boundary of segments
                    noteStartFrame = ptSeg.noteStartEndFrame[ii][0]
                    noteEndFrame = ptSeg.noteStartEndFrame[ii][1]

                    extremaInd = np.array(self.extrema)
                    segmentsInd = extremaInd[self.segments] + noteStartFrame
                    segmentsInd = np.insert(segmentsInd, 0, noteStartFrame)
                    segmentsInd = np.append(
                        segmentsInd,
                        noteEndFrame) + 2  # +2 for sonicVisualizer alignment
                    # segmentsExport[jjj] = str(segmentsInd)
                    for kk in range(len(segmentsInd) - 1):
                        if jjj not in jjNone:
                            segmentsExport[jjj] = [
                                segmentsInd[kk], segmentsInd[kk + 1]
                            ]  #  segmentation boundary
                        jjj += 1

            if evaluation:
                # evaluate segmentation
                COnOff, COn, OBOn, OBOff = \
                    evalu1.coarseEval(ptSeg.coarseSegmentationStartEndFrame,segmentsExport.values())
                COnOffall += COnOff
                COnall += COn
                OBOnall += OBOn
                OBOffall += OBOff
                gt += len(ptSeg.coarseSegmentationStartEndFrame)
                st += len(segmentsExport.values())

            # write feature into json
            featureFilename = os.path.join(featureVecFolderPath,
                                           '%s.json' % rn)
            with open(featureFilename, 'w') as outfile:
                json.dump(featureDict, outfile)

            if predict:
                # output segments boundary frames pitch contours
                outJsonDict = {
                    'refinedPitchcontours': refinedPitchcontours,
                    'boundary': segmentsExport,
                    'extremas': extremas,
                    'vibrato': vibrato
                }
                refined_segment_filename = os.path.join(
                    pitchtrackNotePredictFolderPath,
                    '%s_refinedSegmentFeatures.json' % rn)
                with open(refined_segment_filename, "w") as outfile:
                    json.dump(outJsonDict, outfile)
                    # for se in segmentsExport:
                    #     # outfile.write(str(int(se[0]))+'\t'+str(se[1])+'\n')
                    #     outfile.write(str(se)+'\n')

        if evaluation:
            # print COnOffall,COnall,OBOnall,OBOffall,gt,st
            COnOffF, COnF, OBOnRateGT, OBOffRateGT = evalu1.metrics(
                COnOffall, COnall, OBOnall, OBOffall, gt, st)
            return COnOffF, COnF, OBOnRateGT, OBOffRateGT