def train_base_model(self, data, latent_size=20, iterations=100, lamda=0.05, gamma=0.05, verbose=True): ''' training a SVD model - this is the main function of this class. Doing the iteration of learning according to the gradient descent idea, as described in the exercise instructions :param data: the data-frame to learn from. Should hold 3 columns - UserID, ItemID and Rank :param latent_size: number of latent feature to create. Default=20. Must be positive integer :param iterations: maximum number of iteration to run. The algo can stop before (if the RMSE rises between iterations). Default=100. Must be positive integer :param lamda: the lambda parameter in the algorithm, controls the learning rate. Default=0.05. Must be positive number, smaller than 1 :param gamma: the gamma parameter in the algorithm, controls the learning rate. Default=0.05. Must be positive number, smaller than 1 :param verbose: Boolean, whether to print things along the run :return: nothing, all stored in the class object ''' start_time = datetime.now() # splitting to train and validation (validation is used as an internal dataset to avoid overfitting) train_data, validation_data = train_test_split(data, test_size=0.3, random_state=42) # calculating important values and saving them in the object self._users = set(train_data["UserID"]) self._items = set(train_data["ItemID"]) self._users_avg = data.groupby("UserID")["Rank"].mean().to_dict() self._items_avg = data.groupby("ItemID")["Rank"].mean().to_dict() self._user_item_avg = np.average(data["Rank"]) # setting random values to the models parameters b_u = np.random.uniform(-0.1, 0.1, size=len(self._users)) b_i = np.random.uniform(-0.1, 0.1, size=len(self._items)) p_u = np.random.uniform(-0.1, 0.1, size=(len(self._users), latent_size)) q_i = np.random.uniform(-0.1, 0.1, size=(len(self._items), latent_size)) avg = np.average(train_data["Rank"]) self._average = avg # converting some to data-frames (easier to save and handle later) p_u_df = pd.DataFrame(p_u, index=list(self._users), columns=range(0, latent_size)) q_i_df = pd.DataFrame(q_i, index=list(self._items), columns=range(0, latent_size)) b_u_df = pd.DataFrame(b_u, index=list(self._users)) b_i_df = pd.DataFrame(b_i, index=list(self._items)) # building the evaluation object to be used inside the loop eval_obj = evaluation.Evaluation() # staritng the gradinet descent phase, the loop can stop before we reach 'iterations' number of cycles for i in xrange(iterations): # start_time = datetime.now() # looping over each row in the dataset (user/item/rank) and updating the params for index, row in train_data.iterrows(): cur_user = row["UserID"] cur_item = row["ItemID"] cur_rank = row["Rank"] dot_prod = sum(p_u_df.loc[cur_user] * q_i_df.loc[cur_item]) cur_error = float(cur_rank - avg - b_u_df.loc[cur_user] - b_i_df.loc[cur_item] - dot_prod) b_u_df.loc[cur_user] += lamda * (cur_error - gamma * b_u_df.loc[cur_user]) b_i_df.loc[cur_item] += lamda * (cur_error - gamma * b_i_df.loc[cur_item]) q_i_df.loc[cur_item] += lamda * ( cur_error * p_u_df.loc[cur_user] - gamma * q_i_df.loc[cur_item]) p_u_df.loc[cur_user] += lamda * ( cur_error * q_i_df.loc[cur_item] - gamma * p_u_df.loc[cur_user]) # end of inner loop, now calculating the errors and decide if to go for another loop self._full_pred_matrix = p_u_df.dot(q_i_df.transpose()) self._b_i = b_i_df self._b_u = b_u_df validation_pred = self.predict(new_data=validation_data) eval_obj.evaluate(true_ranks=list(validation_data["Rank"]), predicted_ranks=validation_pred) # duration = (datetime.now() - start_time).seconds # print "Loop number {}, the RMSE is {}," \ # "this loop took us {} minutes".format(i, eval_obj.rmse, duration/60.0) # case the RMSE was improved, we'll save it and go for another loop if eval_obj.rmse < self.rmse: self.rmse = eval_obj.rmse # case we need to finish the algorithm, RMSE wasn't improved else: train_pred = self.predict(new_data=train_data) eval_obj.evaluate(true_ranks=list(train_data["Rank"]), predicted_ranks=train_pred) duration = (datetime.now() - start_time).seconds print "SVD model finished, took us {} loops and {} minutes." \ " RMSE measure in the train dataset is {}" \ " MAE measure in the train dataset is {}".format(i, duration/60.0, eval_obj.rmse, eval_obj.mae) break
def main(): valid_categories = [ 'active', 'homeservices', 'hotelstravel', 'nightlife', 'pets', 'restaurants', 'shopping', 'all' ] # region 1. Configurations (USER MUST DEFINE THESE) # must be out of the following: active, homeservices, hotelstravel, nightlife, pets, restaurants, shopping, all cur_category = 'pets' data_path = "C:\\Users\\abrahami\\Documents\\Private\\Uni\\BGU\\Reco.Systems - Bracha\\HW1\\Yelp" model = 'svd' # should be one out of svd/cb/hybrid # input validity check if cur_category not in valid_categories: raise IOError("category provided in invalid") if cur_category == 'all' and model != 'svd': raise IOError( "modeling based on all categories is possible only with SVD model. In order to model with CB" "or hybrid all categories, you need to loop over all possible categories and combine results" ) # endregion # region 2. Data load loader_obj = loader.Loader(path=data_path) if cur_category != 'all': data = loader_obj.load(category=cur_category, d_train_all_categories=False) else: data = loader_obj.load(d_train_all_categories=True) train = data['train_data'] test = data['test_data'] print "Loading has just finished, shape of the train dataset is {}," \ " shape of the test dataset is {}.\n" \ "There are {} distinct users and {} distinct items in the training dataset." \ " There are {} distinct users and {} distinct items in the test dataset\n".\ format(train.shape, test.shape, len(train["UserID"].unique()), len(train["ItemID"].unique()), len(test["UserID"].unique()), len(test["ItemID"].unique())) # endregion # region 3. Modeling if model == 'svd': svd_obj = svd.SVDRecommender() svd_obj.train_base_model(data=train, latent_size=5, iterations=10, gamma=0.01, lamda=0.01) test_prediction = svd_obj.predict(new_data=test) if model == 'cb': cb_obj = content_based.ContentBasedRecommender(gamma=0.5) items_path = data_path + "\\" + cur_category + "\\" + "items.txt" cb_obj.train_tf_idf(path=items_path) test_prediction = cb_obj.predict(new_data=test, train_data=train) if model == 'hybrid': hybrid_obj = hybrid.HybridModeling() items_path = data_path + "\\" + cur_category + "\\" + "items.txt" hybrid_obj.train_hybrid(train_data=train, items_path=items_path) test_prediction = hybrid_obj.predict(new_data=test, train_data=train) # endregion # region 4. Evaluation eval_obj = evaluation.Evaluation(smart_rounding=True) eval_obj.evaluate(true_ranks=list(test["Rank"]), predicted_ranks=test_prediction) print "\n{} model results: the RMSE of the test data-set is {}, the MAE is {}". \ format(model, eval_obj.rmse, eval_obj.mae)
sys.path.insert(0,'class') import model import pathconfig import preprocessing_images import evaluation ''' Script py to evaluate the network Transfer Learning ''' paths = pathconfig.paths() Model = model.Model() Evaluation = evaluation.Evaluation() model_Transfer_Learning= model.Transfer_Learning() #Tranfer Learning model path_test_set = paths.PATH_IMAGES_BLIND_TEST #the dataset chosen for evaluation. If you want change, you have to just change the #path of dataset ( paths.PATH_IMAGES_TEST ) path_model = model_Transfer_Learning.path_model path_weights = model_Transfer_Learning.path_weights model = Model.load_model(path_model,path_weights) Preprocessing_Images = preprocessing_images.Preprocessing_Images() test_datagen, val_steps, classnames = Preprocessing_Images.get_set_from_path(setdata=path_test_set) predictions = model.predict_generator(test_datagen,verbose=1,steps=val_steps) Ypred = np.argmax(predictions, axis=1) #label predicted Ytest = test_datagen.classes #label of test
def evaluation(self): self.table_ev_results.setRowCount(0) self.completed = 0 algorithms = [0, 0, 0, 0] if self.check_ev_alg_jaccarda.isChecked(): algorithms[0] = 1 if self.check_ev_alg_f1_score.isChecked(): algorithms[1] = 1 if self.check_ev_alg_evs.isChecked(): algorithms[2] = 1 if self.check_ev_alg_mse.isChecked(): algorithms[3] = 1 ground_true_folder = self.line_ev_true_folder.text() true_segmentation_paths = self.loadImagesPathsInFolder(ground_true_folder, [".bmp"]) predicted_folder = self.line_ev_pred_folder.text() predicted_segmentation_paths = self.loadImagesPathsInFolder(predicted_folder, [".jpg", ".png", ".bmp"]) predicted_segmentation_paths = self.filterPredictedPathsForEvaluation(predicted_segmentation_paths, 'Binary') evaluation_paths = self.connectPaths(predicted_segmentation_paths, true_segmentation_paths) self.progressBar_evaluation.setValue(0) self.evaluation_results = [] length = len(evaluation_paths) row = 0 # ev = evaluation.Evaluation(evaluation_paths, algorithms) # for current_results in ev.getResults(): for paths in evaluation_paths: paths_nested = [] paths_nested.append(paths) ev = evaluation.Evaluation(paths_nested, algorithms) # Evaluation current_results = ev.getResults() # print("evaluation results: ", evaluation_results) # Displaying result current_results = current_results[0] pred_img = cv2.imread(current_results["predicted path"]) true_img = cv2.imread(current_results["true path"]) pred_img = cv2.resize(pred_img, (50, 50)) true_img = cv2.resize(true_img, (50, 50)) pred_img = QtGui.QImage(pred_img.data, pred_img.shape[1], pred_img.shape[0], 3 * pred_img.shape[1], QtGui.QImage.Format_RGB888) true_img = QtGui.QImage(true_img.data, true_img.shape[1], true_img.shape[0], 3 * true_img.shape[1], QtGui.QImage.Format_RGB888) pred_icon = QtGui.QIcon() true_icon = QtGui.QIcon() pred_icon.addPixmap(QtGui.QPixmap.fromImage(pred_img), QtGui.QIcon.Normal, QtGui.QIcon.Off) true_icon.addPixmap(QtGui.QPixmap.fromImage(true_img), QtGui.QIcon.Normal, QtGui.QIcon.Off) # rowPosition = self.table_ev_results.rowCount() self.table_ev_results.setRowCount(length) item = QTableWidgetItem(self.getShortFilePath(current_results["predicted path"])) item.setIcon(pred_icon) self.table_ev_results.setItem(row, 0, item) item = QTableWidgetItem(self.getShortFilePath(current_results["true path"])) item.setIcon(true_icon) self.table_ev_results.setItem(row, 1, item) self.table_ev_results.setItem(row, 2, QTableWidgetItem(str(round(current_results["jaccarda index weighted"], 4)))) self.table_ev_results.setItem(row, 3, QTableWidgetItem(str(round(current_results["f1 score weighted"], 4)))) self.table_ev_results.setItem(row, 4, QTableWidgetItem(str(round(current_results["explained variance score"] , 4)))) self.table_ev_results.setItem(row, 5, QTableWidgetItem(str(round(current_results["mean squared error"], 4)))) self.table_ev_results.resizeRowsToContents() row += 1 # item = QListWidgetItem(pred_icon, true_icon, evaluation_results[0]["predicted path"], # evaluation_results[0] ["jaccarda index weighted"], evaluation_results[0]["f1 score weighted"], # evaluation_results[0] ["explained variance score"], evaluation_results[0]["mean squared error"]) # # item.setIcon(icon) self.list_ev_results.addItem(item) self.evaluation_results.append(current_results) # Update progress bar self.completed += int(100 / length) self.progressBar_evaluation.setValue(self.completed) self.progressBar_evaluation.setValue(100)
from constants import * representative_set_df = pd.read_pickle(os.path.join(DEFAULT_PICKLE_PATH, 'representative_set.pkl')) subdir = '2021-03-31-08h-54m_batchsize_16_hg_4_loss_weighted_mse_aug_light_sigma4_learningrate_5.0e-03_opt_rmsProp_gt-4kp_activ_sigmoid_subset_0.50_wmse-1-5' generator = data_generator.DataGenerator( df=representative_set_df, base_dir=DEFAULT_VAL_IMG_PATH, input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, num_hg_blocks=1, # doesn't matter for evaluation b/c we take one stack for GT shuffle=False, batch_size=len(representative_set_df), online_fetch=False) # %% Run visualization on epoch range and save images to disk epochs_to_visualize = [27, 28] #range(34,45) print("\n\nEval start: {}\n".format(time.ctime())) for epoch in epochs_to_visualize: eval = evaluation.Evaluation( model_sub_dir=subdir, epoch=epoch) X_batch, y_stacked = generator[0] # There is only one batch in the generator y_batch = y_stacked[0] # take first hourglass section m_batch = representative_set_df.to_dict('records') # TODO: eventually this will be passed from data generator as metadata eval.visualize_batch(X_batch, y_batch, m_batch) print("\n\nEval end: {}\n".format(time.ctime())) # %%
BATCH_SIZE = 8 NUM_WORKERS = 0 NUM_VAL = 3 USE_GPU = True if __name__ == '__main__': transformed_data = data_loader.CityScape(train=False, rand=-1) dataloaders = DataLoader(transformed_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS) model = torch.load('./bak/model.pkl') evaluator_val = evaluation.Evaluation(34) images_so_far = 0 model.eval() with torch.no_grad(): for batches in dataloaders: if USE_GPU: inputs = batches['image'].cuda() labels = batches['label'] else: inputs = batches['image'] labels = batches['label'] outputs = model(inputs) _, preds = torch.max(outputs, 1) result = preds.cpu()
# import agent # in work # import agent_first_visit_mc_estimating_v as agent # in work # import agent_tabular_td0_estimating_v as agent # in work import agent_sarsa_learning as agent import evaluation import logging # logging.basicConfig(format='%(lstateinfosevelname)-8s [%(filename)s:%(lineno)d] %(message)s', level=logging.DEBUG) logging.basicConfig( format='%(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', level=logging.WARNING) env = environment.Environment() agent = agent.Agent(env.get_action_space()) evaluation = evaluation.Evaluation(agent, env) # liest nur aus den Objekten count_episode_done = 0 for i_episode in range(10000): state, reward = env.reset('random') # random or fix agent.reset() for t in range( 60 ): #60): # Achtung: cleasehr kritischer Parameter -> ca. 2 * laengster Weg action = agent.step(state, reward) state, reward, done, info = env.step(action) if done: agent.done(state, reward)
if len(sys.argv) != 6: sys.exit( "Usage: python main.py filedir golddir similarity ngramsLen method..." ) u = utterances.Utterances(sys.argv[1], sys.argv[2]) v = variations.Variations(u._utterances, sys.argv[3], sys.argv[4], sys.argv[5]) varset = v.mark_variation_sets() utterances_in_varset = 0 sys.stdout = codecs.getwriter('utf8')(sys.stdout) for number, el in enumerate(varset): a, b, c = zip(*(el)) utterances_in_varset += len(c) print "\n", number for ut in el: print ut[0], ut[1], ut[2] print "\nSTATS of MARKED VARIATION SETS in Files from: ", sys.argv[1] print "\nnumber of utterances: ", len(u._utterances) print "number of variation sets: ", len(varset) print "number of utterances in variation sets: ", utterances_in_varset gold_utterances = u._goldutterances gu_length = 0 # get the total amount of utterances in variation sets for u in gold_utterances: gu_length += len(u) print "\n\nnumber of gold utterances: ", gu_length e = evaluation.Evaluation(varset, gold_utterances)
def run_epoch(data, is_training, model, optimizer, args): ''' Train model for one pass of train data, and return loss, acccuracy ''' data_loader = torch.utils.data.DataLoader(data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) losses = [] if is_training: model.train() else: model.eval() for batch in tqdm(data_loader): cosine_similarity = nn.CosineSimilarity(dim=0, eps=1e-6) criterion = nn.MultiMarginLoss(margin=0.4) #pdb.set_trace() if is_training: optimizer.zero_grad() #out - batch of samples, where every sample is 2d tensor of avg hidden states bodies, bodies_masks = autograd.Variable( batch['bodies']), autograd.Variable(batch['bodies_masks']) if args.cuda: bodies, bodies_masks = bodies.cuda(), bodies_masks.cuda() out_bodies = model(bodies, bodies_masks) titles, titles_masks = autograd.Variable( batch['titles']), autograd.Variable(batch['titles_masks']) if args.cuda: titles, titles_masks = titles.cuda(), titles_masks.cuda() out_titles = model(titles, titles_masks) hidden_rep = (out_bodies + out_titles) / 2 #Calculate cosine similarities here and construct X_scores #expected datastructure of hidden_rep = batchsize x number_of_q x hidden_size cs_tensor = autograd.Variable( torch.FloatTensor(hidden_rep.size(0), hidden_rep.size(1) - 1)) if args.cuda: cs_tensor = cs_tensor.cuda() #calculate cosine similarity for every query vs. neg q pair for j in range(1, hidden_rep.size(1)): for i in range(hidden_rep.size(0)): cs_tensor[i, j - 1] = cosine_similarity(hidden_rep[i, 0, ], hidden_rep[i, j, ]) #cs_tensor[i, j-1] = cosine_similarity(hidden_rep[i, 0, ].type(torch.FloatTensor), hidden_rep[i, j, ].type(torch.FloatTensor)) X_scores = torch.stack(cs_tensor, 0) y_targets = autograd.Variable( torch.zeros(hidden_rep.size(0)).type(torch.LongTensor)) if args.cuda: y_targets = y_targets.cuda() if is_training: loss = criterion(X_scores, y_targets) print "Loss in batch", loss.data loss.backward() optimizer.step() losses.append(loss.cpu().data[0]) else: #Average Precision = (sum_{i in j} P@i / j) where j is the last index for i in range(args.batch_size): updateScores(args, cs_tensor, batch['similar'][i], i, all_samples) # Calculate epoch level scores if is_training: avg_loss = np.mean(losses) print('Average Train loss: {:.6f}'.format(avg_loss)) print() else: evalobj = evaluation.Evaluation(all_samples) print "MAP:", evalobj.MAP() print "MRR:", evalobj.MRR() print "P@5:", evalobj.Precision(5) print "P@1:", evalobj.Precision(1)
def ComputeSimilarity(path, vectorizer, questions_dict, CROSS_DOMAIN): ''' Expects a list of paths (containing a single element if validation data is from the same domain, two elements if the data is from another domain) ''' all_q_scores = [] questions_numbers = questions_dict.keys() dataset = createSamplesDataset(path, CROSS_DOMAIN, questions_numbers) sum_av_prec = 0.0 sum_ranks = 0.0 num_samples = 0.0 top_5 = 0.0 top_1 = 0.0 auc_met = meter.AUCMeter() sorted_results = [] for q in dataset: pos = dataset[q][0] rest_q = dataset[q][1] try: query_vector = vectorizer.transform([questions_dict[q]]).toarray() except: continue vector_label_list = createVectorLabelTuples(rest_q, questions_dict, q, pos, vectorizer) all_q_feature_vectors = [x[0] for x in vector_label_list] cs = cosine_similarity(all_q_feature_vectors, query_vector) cs_label_pair = [] for index, question in enumerate(vector_label_list): cs_label_pair.append((cs[index], question[1])) if not CROSS_DOMAIN: scores_list = sorted(cs_label_pair, reverse = True, key=itemgetter(0)) labels_only = [x[1] for x in scores_list] sorted_results.append(labels_only) #sum_av_prec, sum_ranks, num_samples, top_5, top_1 = updateScores(scores_list, sum_av_prec, sum_ranks, num_samples, top_5, top_1) else: auc_met.add(cs[index], question[1]) all_q_scores.extend(cs_label_pair) if not CROSS_DOMAIN: evalobj = evaluation.Evaluation(sorted_results) print "MAP:", evalobj.MAP() print "MRR:", evalobj.MRR() print "P@5:", evalobj.Precision(5) print "P@1:", evalobj.Precision(1) else: print 'AUC: {:.4f}'.format(auc_met.value(0.05))
def train_model(self): '''Main function for training model.''' # Initialize running values. global_step = 0 len_dev = len(self.dev_loader) # Training loop. self.model.train() for epoch in range(self.num_epochs): running_loss = 0.0 logging.info("Epoch number: {}".format(epoch)) print("epoch ", epoch) self.model.train() for batch_idx, batch in enumerate(self.train_loader): print(batch_idx) text = { key: val.to(self.device) for key, val in batch['text'].items() } images_features = batch['images_features'].to( self.device).squeeze(0) actions_target = batch['actions'].to(self.device).squeeze(0) actions_binary = batch['actions_binary'].to( self.device).squeeze(0) actions_output = self.model(text, images_features, actions_binary) loss = self.loss(actions_output, actions_target) self.optimizer.zero_grad() loss.backward() running_loss += loss self.optimizer.step() print("loss:", running_loss) ############## self.model.eval() evaluator = evaluation.Evaluation(len_dev) with torch.no_grad(): for batch_idx, batch in enumerate(self.dev_loader): print(batch_idx) text = { key: val.to(self.device) for key, val in batch['text'].items() } images_feature = batch['images_features'][:, 0].to( self.device) route_panoids = batch['route_panoids'] route_panoids = [x[0] for x in route_panoids] first_panoid = route_panoids[0] panormid_pred = [] action_list = [] panormid_pred.append(first_panoid) curr_state = (first_panoid, 0) action = -1 action_binary = torch.zeros(4) while action != ACTION_DICT['stop']: actions_output = self.model( text, images_feature, action_binary.unsqueeze(0).to(self.device)) topv, topk = actions_output.data.topk(2) action = topk.squeeze().tolist()[0] new_state = navigator._get_next_graph_state( curr_state, ACTION_DICT_IDX[action]) if action == ACTION_DICT['forward']: if new_state[0] == curr_state[ 0]: # Couldn't move forward. action = topk.squeeze().tolist()[1] new_state = navigator._get_next_graph_state( curr_state, ACTION_DICT_IDX[action]) else: panormid_pred.append(new_state[0]) curr_state = new_state panoid = curr_state[0] feature_path = path.abspath( path.join(MAIN_DIR, 'features', panoid + '.pt')) images_feature = torch.load(feature_path).to( self.device) action_list.append(action) action_binary = torch.zeros(4) action_binary[action] = 1 if len(action_list) > 36: action = 3 print("!!!!!") evaluator.evaluate_single_sample(panormid_pred, route_panoids) evaluator.performance_info() print("END")
def simulatedSolution(self): nonDominated = list() Resultfile = open("result.txt", "a") Result = [ [] for i in xrange(len(self.GeneticRules)) ] #it is depend on number of dispatching rules. GA kullaniliyor #Result=[ [] for i in xrange(14)] #Result[0].append(0) for dRulesID in range(len(self.GeneticRules)): #for dRulesID in range(14): #for h in range(10): self.__init__(self.problem, self.solution, self.visitindex, self.GeneticRules, self.extraction) self.initialization() self.nextTime = self.currentTime z = min(self.nextEventsSet, key=lambda tup: tup[2] ) #job in Evenset assigned to machines randomly lena = len(self.problem.jobs[z[0]].operations[z[1]].machineSet) a = np.random.randint(0, lena) assignedMachineId = self.problem.jobs[z[0]].operations[ z[1]].machineSet[a].id self.assignment.append([z[0], z[1], assignedMachineId, 'r']) self.solution.machines[ assignedMachineId].assigmentOperation.append(z[:2]) self.solution.machines[assignedMachineId].mwlm.append(z[:2]) self.solution.machines[ assignedMachineId].mwlwm += self.problem.jobs[z[0]].operations[ z[1]].processingTimes[a] #print self.solution.jobs[z[0]].id,self.solution.jobs[z[0]].operations[z[1]].id,self.solution.jobs[z[0]].operations[z[1]].machineId self.update(self.assignment) for i in self.nextEventsSet: if i[2] == self.currentTime: self.nextEventsSet.remove(i) while len(self.notFinishedOpSet) > 0: self.findNextTimeandEvents() self.currentTime = self.nextTime if len(self.releasedOpSet) > 0: self.LeastWaitingTimeAssignment() lastStarted = self.updateMachineSet(self.lastAssigned, self.machineEventSet, dRulesID) self.lastAssigned = list() if len(lastStarted) > 0: self.update(lastStarted) '''for i in self.nextEventsSet: if i[2]==self.currentTime and i[3]=='r': self.nextEventsSet.remove(i) ''' #a=TS.tranformation(self.problem,self.solution)#used for draw gantt chart mal = EV.Evaluation(self.solution) Resultfile.write(str(dRulesID) + "\t") Resultfile.write(str(mal[0]) + "\t") Resultfile.write(str(mal[1][0]) + "\t") Resultfile.write(str(mal[1][1]) + "\n") Result[dRulesID] += [dRulesID, mal[0], mal[1][0], mal[1][1]] #GA kullanilir af = 4 #nonDominated.append([mal[0],mal[1][0],mal[1][1]]) #print d,mal[0],mal[1][0],mal[1][1] #print nonDominated #nDominated=NDS.sorting(nonDominated) Resultfile.close() return Result #GA kullaniliyor
def train_and_dev(self, sess, x, y, seq_len, batch_size, test_x, test_y, test_seq_len, epoch_size, data=None): learning_rate = 1e-3 train_aux_xs = data.train_aux_xs train_aux_xs_len = data.train_aux_xs_len train_aux_len = data.train_aux_len test_aux_xs = data.dev_aux_xs test_aux_xs_len = data.dev_aux_xs_len test_aux_len = data.dev_aux_len best_acc = 0.0 try: for epoch in range(epoch_size): total_cost = 0.0 total_batch = 0 total_acc_num = 0 for batch_x, batch_y, batch_len, batch_xs, batch_xs_len, batch_aux_len in batch_iter( x, y, seq_len, batch_size, train_aux_xs, train_aux_xs_len, train_aux_len, shuffle=True): total_batch += 1 _, cost_val, acc_cnt = sess.run( [self.train_op, self.cost, self.acc_cnt], feed_dict={ self.in_x: batch_x, self.in_y: batch_y, self.in_len: batch_len, self.aux_xs: batch_xs, self.aux_xs_len: batch_xs_len, self.aux_len: batch_aux_len, self.learning_rate: learning_rate, self.dropout_rate: 0.0 }) total_acc_num += acc_cnt total_cost += cost_val if total_batch % 30 == 0: print('batch_%d cost_val: %0.5f' % (total_batch, cost_val)) print('Epoch:', '%02d' % (epoch + 1), 'cost_avg =', '%0.5f' % (total_cost / total_batch), 'acc: %0.5f' % (total_acc_num / (0.0 + len(x)))) if epoch < 4 and epoch % 2 == 1: learning_rate /= 10. print('drop learning rate, Epoch:{} - {}'.format( epoch + 1, learning_rate)) ### total_acc_num_test = 0 for batch_test_x, batch_test_y, batch_len, batch_xs, batch_xs_len, batch_aux_len in batch_iter( test_x, test_y, test_seq_len, 200, test_aux_xs, test_aux_xs_len, test_aux_len): acc_cnt_test = sess.run(self.acc_cnt, feed_dict={ self.in_x: batch_test_x, self.in_y: batch_test_y, self.in_len: batch_len, self.aux_xs: batch_xs, self.aux_xs_len: batch_xs_len, self.aux_len: batch_aux_len, self.dropout_rate: 0.0 }) total_acc_num_test += acc_cnt_test cur_acc = total_acc_num_test / (0.0 + len(test_x)) print('test acc: {:.5f}'.format(cur_acc)) if best_acc < cur_acc: best_acc = cur_acc self.saver.save(sess, config.save_dir + '/rcnn_saver.ckpt', global_step=epoch + 1) probs, preds = self.predict(sess, data, False) self.outputs(preds) evaluation.Evaluation( config.dev_file, config.dev_predict_file.format(self.params.model_name)) except Exception: print(Exception)
def main(): args = decode_args(sys.argv) to_dos = [("DATA/Swedish_MINGLE_dataset/plain/1", "DATA/Swedish_MINGLE_dataset/GOLD/1"), ("DATA/Swedish_MINGLE_dataset/plain/2", "DATA/Swedish_MINGLE_dataset/GOLD/2"), ("DATA/Swedish_MINGLE_dataset/plain/3", "DATA/Swedish_MINGLE_dataset/GOLD/3"), ("DATA/Swedish_MINGLE_dataset/plain/4", "DATA/Swedish_MINGLE_dataset/GOLD/4"), ("DATA/Childes/GOLD/French/cleaned/plain", "DATA/Childes/GOLD/French/cleaned/annotated")] fuzzy_precisions, strict_precisions, fuzzy_recalls, strict_recalls,\ fuzzy_f1s, strict_f1s = [], [], [], [], [], [] for to_do in to_dos: print("Finding variation sets in" + to_do[0]) u = utterances.Utterances(to_do[0], to_do[1]) gold_utterances = u._goldutterances utterances_reformatted = [] ids = [] for utterance in u._utterances: new_utt = utterance[2].split() utterances_reformatted.append(new_utt) ids.append((utterance[0], utterance[1])) utt_iter = window(utterances_reformatted, args[1]) id_iter = window(ids, args[1]) ids = [i for i in id_iter] if args[0] == "anch": ids_and_matches = matches_anchor(utt_iter, args[2], args[3], args[4], False, ids) else: ids_and_matches = matches_incremental(utt_iter, args[2], args[3], args[4], False, ids) combined = convert_varseta_format(ids_and_matches) varseta_eval = evaluation.Evaluation(combined, gold_utterances) fuzzy_precisions.append(varseta_eval.fuzzy_precision) strict_precisions.append(varseta_eval.strict_precision) fuzzy_recalls.append(varseta_eval.fuzzy_recall) strict_recalls.append(varseta_eval.strict_recall) fuzzy_f1s.append(varseta_eval.fuzzy_f1) strict_f1s.append(varseta_eval.strict_f1) print('\tFuzzy Precision: {:0.2f}'.format( varseta_eval.fuzzy_precision)) print('\tFuzzy Recall: {:0.2f}'.format(varseta_eval.fuzzy_recall)) print('\tFuzzy F1: {:0.2f}'.format(varseta_eval.fuzzy_f1)) print('') print('\tStrict Precision: {:0.2f}'.format( varseta_eval.strict_precision)) print('\tStrict Recall: {:0.2f}'.format(varseta_eval.strict_recall)) print('\tStrict F1: {:0.2f}'.format(varseta_eval.strict_f1)) print('\n') avg_fuzzy_precision = sum([i for i in fuzzy_precisions ]) / len(fuzzy_precisions) avg_fuzzy_recall = sum([i for i in fuzzy_recalls]) / len(fuzzy_recalls) avg_fuzzy_f1 = sum([i for i in fuzzy_f1s]) / len(fuzzy_f1s) avg_strict_precision = sum([i for i in strict_precisions ]) / len(strict_precisions) avg_strict_recall = sum([i for i in strict_recalls]) / len(strict_recalls) avg_strict_f1 = sum([i for i in strict_f1s]) / len(strict_f1s) print('Average Scores:') print('Average Fuzzy Precision: {:0.2f}'.format(avg_fuzzy_precision)) print('Average Fuzzy Recall: {:0.2f}'.format(avg_fuzzy_recall)) print('Average Fuzzy F1: {:0.2f}'.format(avg_fuzzy_f1)) print('') print('Average Strict Precision: {:0.2f}'.format(avg_strict_precision)) print('Average Strict Recall: {:0.2f}'.format(avg_strict_recall)) print('Average Strict F1: {:0.2f}'.format(avg_strict_f1))
# valid information valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset) similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True) # merge all summaries merged = tf.summary.merge_all() # create a saver saver = tf.train.Saver() # analogy evaluator evaluator = evaluation.Evaluation(normalized_embeddings, W2I, questions) # global init init = tf.global_variables_initializer() """ Training procedure """ num_steps = int(math.ceil( data.shape[0] / BATCH_SIZE)) * NB_EPOCHS # num steps to train data_index = 0 # determine which batch to extract average_loss = 0 # overall loss value nth_epoch = 1 with tf.Session(graph=graph) as session: # open a writer to write summaries. writer = tf.summary.FileWriter(TMP_DIR, session.graph)
def launcher( group_by, path_list='/sequoia/data2/gcheron/UCF101/detection', path_info='/sequoia/data2/gcheron/UCF101/detection/mytracksK5_600k', path_tracks='/sequoia/data2/gcheron/pytorch/diffrac_action_localization/UCF101/results/mytracksK5_600k/tracks/', prepend_name='UCF101_spatiotemporal_at_least_one_gb', feat_type='RGB+OPF', sp_iou_thresh=0.3, write_eval=False, n_iterations=10000, alpha=1e-4, n_actions=25, cache_dir='/sequoia/data2/jalayrac/nips2017weakpose/cache/', res_dir='/sequoia/data2/jalayrac/nips2017weakpose/', cstrs_name='at_least_one_per_instance_unit_time', exp_suffix='', rdm_seed=19, path_log_eval=None, save_pr_curves=False, eval_frequency=500, val_eval=False, video_eval_args=None, video_eval_only=False, calibrate=False, use_calibration=False, no_init=False, no_feat_init=False): np.random.seed(rdm_seed) bias_value = BIAS_VALUE assert bias_value == 100, 'This value is hard coded in evaluation' need_init = not (video_eval_only or no_init) need_feat_init = not (video_eval_only or no_feat_init) if calibrate: mode = 'val' else: mode = 'train' # Get the features and GT. if need_feat_init: feats_train, labels_train, list_vid = load_feats_and_gts( mode, path_list, prepend_name, feat_type, group_by, sp_iou_thresh, cache_dir, path_tracks, path_info, n_actions) validation_info = None if need_feat_init: if val_eval: feats_val, labels_val, _ = load_feats_and_gts( 'test', path_list, prepend_name, feat_type, group_by, sp_iou_thresh, cache_dir, path_tracks, path_info, n_actions) validation_info = {'gts': labels_val, 'feats': feats_val} # Get the at least one constraints for the training set. print('Dealing with constraints for {} set'.format(mode)) # For each video get the data. name_cstrs = 'cstrs_{}_{}_gb_{}_spth_{}_{}'.format(prepend_name, cstrs_name, group_by, sp_iou_thresh, mode) path_cstrs = os.path.join(cache_dir, name_cstrs + '.npy') fn_cstrs = 'get_{}_cstr_for_video'.format(cstrs_name) compute_cstrs = True if not need_init: compute_cstrs = False elif os.path.isfile(path_cstrs): cstrs_train = np.load(path_cstrs).tolist() compute_cstrs = False if compute_cstrs: cstrs = [] for id_vid, video_name in enumerate(tqdm(list_vid)): present, constraint = data_handler.get_general_cstr_for_video( video_name, groupby=group_by, path_info=path_info, path_tracks=path_tracks, n_actions=n_actions, constr_fn_name=fn_cstrs) if present: cstrs.append(constraint) assert present, ( '{} is missing: please provide a list were all videos have gt/features/labels...' ).format(video_name) print('Saving constraints...') np.save(path_cstrs, cstrs) cstrs_train = cstrs # Generate random init. print('Generating a random Y init...') asgn_train = [] list_at_least_one_cstr = [ 'at_least_one_per_instance', 'at_least_one_per_instance_unit_time', 'fully_supervised', 'spot_on', 'at_least_one_per_temporal_point_unit_time', 'at_least_one_per_instance_unit_time_with_keyframes', 'at_least_one_per_temporal_point_unit_time_with_keyframes', 'at_least_one_clip_level', 'at_least_one_shot_level' ] if not need_init: at_least_one_solver = None pass elif cstrs_name in list_at_least_one_cstr: at_least_one_solver = linear_solvers.AtLeastOneSolver() else: raise ValueError( 'cstrs_name: {} is not a valid option.'.format(cstrs_name)) name_cstrs_init = 'cstrs_init_{}_{}_gb_{}_spth_{}_{}'.format( prepend_name, cstrs_name, group_by, sp_iou_thresh, mode) path_cstrs_init = os.path.join(cache_dir, name_cstrs_init + '.pkl') if not need_init: pass elif os.path.isfile(path_cstrs_init): with open(path_cstrs_init, 'rb') as f_cstrs: cstrs_init_dict = pickle.load(f_cstrs) asgn_train = cstrs_init_dict['asgn_train'] else: n_rdm_init = 5 for y, cstr in tqdm(zip(labels_train, cstrs_train)): # Generate a random gradient asgn_init = np.zeros(y.shape) for _ in range(n_rdm_init): rand_grad = np.random.randn(y.shape[0], y.shape[1]) asgn_init += 1.0 / n_rdm_init * at_least_one_solver.solve( cstr, rand_grad) asgn_train.append(asgn_init) # Save to path. with open(path_cstrs_init, 'wb') as f_cstrs: pickle.dump({'asgn_train': asgn_train}, f_cstrs) # Create the experiment name. exp_name = '{}_{}_niter_{}_gb_{}_{}_spth_{}_lambda_{}_pratio_{}_{}_slack_{}_beta_{}_delta_{}{}'.format( prepend_name, cstrs_name, n_iterations, group_by, mode, sp_iou_thresh, alpha, -1, feat_type, False, -1.0, -1.0, exp_suffix) exp_name_val = re.sub('train_spth_', 'val_spth_', exp_name) # set out dirs/paths path_out = os.path.join(res_dir, exp_name) path_out_val = os.path.join(res_dir, exp_name_val) name_asgn = 'asgn.npy'.format(exp_name) path_asgn = os.path.join(path_out, name_asgn) # get a classifier name_w = 'w.pkl'.format(exp_name) path_w = os.path.join(path_out, name_w) call('mkdir -p {}'.format(path_out), shell=True) if video_eval_only: weights = None pass else: test_fun = None if os.path.isfile(path_w): with open(path_w) as f_w: weights = pickle.load(f_w) else: # Launch FW optim. path_log_eval = os.path.join( path_log_eval, exp_name) if path_log_eval is not None else None print('Launching the BCFW optim...') asgn_final, weights = bcfw_diffrac.solver( feats_train, asgn_train, cstrs_train, at_least_one_solver, gts=labels_train, alpha=alpha, verbose='normal', bias_value=bias_value, block_sampling='gap_sampling', n_iterations=n_iterations, objective_frequency=250, eval_frequency=eval_frequency, eval_function=eval_functions.MAP( n_actions - 1, path_save=path_log_eval, save_pr_curves=save_pr_curves), validation_info=validation_info) # Save assignmnent. np.save(path_asgn, asgn_final) # save final classifier with open(path_w, 'wb') as f_w: pickle.dump(weights, f_w) if calibrate and not use_calibration: # need to test the validation videos to calibrate list_file = '{}/OF_vidlist_{}1.txt'.format(path_list, 'valtrainRate10_val') else: list_file = '{}/OF_vidlist_{}1.txt'.format(path_list, 'test') # Evaluate on test data. if video_eval_only: pass elif write_eval: print('Writing evaluation files in {}'.format(path_out)) with open(list_file, 'r') as f_list: list_vid = f_list.readlines() list_vid = [x.strip().split(' ')[0] for x in list_vid] for id_vid, video_name in enumerate(tqdm(list_vid)): data_handler.write_eval_data(video_name, weights, bias_value=bias_value, path_root_out=os.path.join( path_out, 'tracks'), path_info=path_info, path_tracks=path_tracks, feat_type=feat_type, n_actions=n_actions, test_fun=test_fun) if write_eval or video_eval_only: if use_calibration: calib_path = os.path.join(path_out_val, 'calibration.pkl') print 'Load calibration from:\n{}'.format(calib_path) with open(calib_path) as f: calibration = pickle.load(f) loc_th = np.zeros((n_actions - 1, len(video_eval_args['iou']))) for i, iou in enumerate(video_eval_args['iou']): loc_th[:, i] = calibration[iou] # Set one th per action, per iou. else: loc_th = video_eval_args['loc_th'] # clear eventual existing evaluation cache call('rm -rf {}/evaluation_cache'.format(path_out), shell=True) # create instance to eval video mAP ev = evaluation.Evaluation( video_eval_args['datasetname'], [path_out], list_file, video_eval_args['iou'], smooth_window=25, loc_th=loc_th, track_class_agnostic=video_eval_args['track_class_agnostic'], force_no_regressor=True, nthreads=8, one_th_per_iou=use_calibration) return ev
preprocessor = preprocessing.PreProcessing() X_train, x_test, Y_train, y_test = preprocessor.preprocess( data=training_data, test_size=0.25, train_size=0.75, random_state=69, target_variable="Value" # Possibilites: "Value", "Wage", "Release_Clause" ) ### MODEL BUILDING ### modeller = model.Model() gbr_model, rfr_model, dtr_model = modeller.train_models(X_train, Y_train, n_estimators=1000, max_depth=5, learning_rate=0.1, random_state=69, n_jobs=6) ### EVALUATION ### evaluator = evaluation.Evaluation() evaluator.evaluate_models(gbr_model=gbr_model, rfr_model=rfr_model, dtr_model=dtr_model, x_test=x_test, y_test=y_test, save_plots=True)
def noteSegmentationFeatureExtraction(self, pitchtrackNoteFolderPath, featureVecFolderPath, pitchtrackNotePredictFolderPath, recordingNames, segCoef=0.2, predict=False, evaluation=False): ''' This process will do 1) segment pitchtrack into notes which boundaries are given by pYIN 2) refined segmentation searching stable part 3) calculate features on refined segments :param pitchtrackNoteFolderPath: :param featureVecFolderPath: :param recordingNames: :param predict: :return: refined segments boundaries, refined segments pitch contours ''' ############################################## segmentation ######################################################## # below two lines will do segmentaion on calculation the pyin # ptSeg = pitchtrackSegByNotes.pitchtrackSegByNotes(samplingFreq, frameSize, hopSize) # ptSeg.doSegmentation(pitchtrack, fs.m_oMonoNoteOut) ptSeg = pitchtrackSegByNotes.pitchtrackSegByNotes() if evaluation: evalu1 = evalu.Evaluation() # evaluation object COnOffall, COnall, OBOnall, OBOffall, gt, st = 0, 0, 0, 0, 0, 0 for rn in recordingNames: pitchtrack_filename = os.path.join(pitchtrackNoteFolderPath, rn + '_pitchtrack.csv') monoNoteOut_filename = os.path.join(pitchtrackNoteFolderPath, rn + '_monoNoteOut.csv') ptSeg.doSegmentationForPyinVamp(pitchtrack_filename, monoNoteOut_filename) if evaluation: coarseSegmentation_filename = os.path.join( pitchtrackNoteFolderPath, rn + '_coarseSeg.txt') ptSeg.coarseSegmentation( monoNoteOut_filename, coarseSegmentation_filename) # groundtruth segmentation # ptSeg.pltNotePitchtrack(saveFig=True, figFolder='../jingjuSegPic/laosheng/train/male_13/pos_3_midinote/') ###################### calculate the polynomial fitting coefs and vibrato frequency ############################ # use pitch track ptseg.pitchtrackByNotes from last step featureDict = {} # feature vectors dictionary segmentsExport = {} # refined segmentation boundaries refinedPitchcontours = {} # refined pitch contours extremas = {} # extremas vibrato = {} curvefittingDeg = 1 jj = 1 jjNone = [] jjj = 1 for ii in range(len(ptSeg.pitchtrackByNotes)): pt = ptSeg.pitchtrackByNotes[ii][0] pt = np.array(pt, dtype=np.float32) x, y = self.normalizeNotePt( pt) # normalise x to [0,1], remove y DC sbp = self.localMinMax( y) # local minima and extrema of pitch track self.diffExtrema( x, y) # the amplitude difference of minima and extrema self.segmentPointDetection1( segCoef) # do the extrema segmentation here # nc1.segmentPointDetection2() # segmentation point self.segmentRefinement(pt) # do the refined segmentation #print self.refinedNotePts for rpt in self.refinedNotePts: # print jj featureVec, extrema, vibOut = self.featureExtractionProcess( rpt, sbp, curvefittingDeg) if featureVec[0]: refinedPitchcontours[jj] = rpt.tolist() featureDict[jj] = featureVec.tolist() extremas[jj] = extrema vibrato[jj] = vibOut else: jjNone.append(jj) # this plot step is slow, if we only want the features, we can comment this line #nc1.pltRefinedNotePtFc(xRpt, yRpt, p, rsquare, polyVar, vibFreq, saveFig=True, # figFolder='../jingjuSegPic/laosheng/train/refinedSegmentCurvefit/'+rn+'_curvefit_refined/', # figNumber = jj) jj += 1 if predict: # construct the segments frame vector: frame boundary of segments noteStartFrame = ptSeg.noteStartEndFrame[ii][0] noteEndFrame = ptSeg.noteStartEndFrame[ii][1] extremaInd = np.array(self.extrema) segmentsInd = extremaInd[self.segments] + noteStartFrame segmentsInd = np.insert(segmentsInd, 0, noteStartFrame) segmentsInd = np.append( segmentsInd, noteEndFrame) + 2 # +2 for sonicVisualizer alignment # segmentsExport[jjj] = str(segmentsInd) for kk in range(len(segmentsInd) - 1): if jjj not in jjNone: segmentsExport[jjj] = [ segmentsInd[kk], segmentsInd[kk + 1] ] # segmentation boundary jjj += 1 if evaluation: # evaluate segmentation COnOff, COn, OBOn, OBOff = \ evalu1.coarseEval(ptSeg.coarseSegmentationStartEndFrame,segmentsExport.values()) COnOffall += COnOff COnall += COn OBOnall += OBOn OBOffall += OBOff gt += len(ptSeg.coarseSegmentationStartEndFrame) st += len(segmentsExport.values()) # write feature into json featureFilename = os.path.join(featureVecFolderPath, '%s.json' % rn) with open(featureFilename, 'w') as outfile: json.dump(featureDict, outfile) if predict: # output segments boundary frames pitch contours outJsonDict = { 'refinedPitchcontours': refinedPitchcontours, 'boundary': segmentsExport, 'extremas': extremas, 'vibrato': vibrato } refined_segment_filename = os.path.join( pitchtrackNotePredictFolderPath, '%s_refinedSegmentFeatures.json' % rn) with open(refined_segment_filename, "w") as outfile: json.dump(outJsonDict, outfile) # for se in segmentsExport: # # outfile.write(str(int(se[0]))+'\t'+str(se[1])+'\n') # outfile.write(str(se)+'\n') if evaluation: # print COnOffall,COnall,OBOnall,OBOffall,gt,st COnOffF, COnF, OBOnRateGT, OBOffRateGT = evalu1.metrics( COnOffall, COnall, OBOnall, OBOffall, gt, st) return COnOffF, COnF, OBOnRateGT, OBOffRateGT