def __init__(self, Q_0, hypotheses, examples, eta, delta, eps, teacher_type, exp_iter, mu, top_k): self.Q_0 = Q_0 self.hypotheses = hypotheses self.examples = examples self.eta = eta self.delta = delta self.eps = eps self.teacher_type = teacher_type self.exp_iter = exp_iter self.accumulator = {} self.max_iter = 150 self.mu = mu self.top_k = top_k self.learner = learner.learner(Q_0=Q_0, H=hypotheses, eta=eta) self.teacher = teacher.teacher(H=hypotheses, examples=examples, Q_0=Q_0, eta_learner=eta, delta=delta, eps=eps, teacher_type=teacher_type, max_iter=self.max_iter, mu=self.mu, top_k=self.top_k)
def end_to_end_teaching(self): M_0 = env.get_M_0() M_t, feasible = self.teacher.get_target_M(M_0) env_t = self.modify_env(self.env, M_t) print(env_t.reward) self.learner = learner.learner(env_t) self.learner.UCRL(alpha=self.alpha_UCRL, n_rounds=self.T_UCRL, pi_no_attack=self.pi_no_attack, M_0=M_0, M_t=M_t, cost_p=self.attackers_cost_p)
def __init__(self, H, examples, Q_0, eta_learner, delta, eps, teacher_type, max_iter, mu, top_k=10): self.H = H self.examples = examples self.Q_0 = Q_0 self.Q_of_h_given_S = copy.deepcopy(Q_0) self.eta_learner = eta_learner self.delta = delta self.teacher_type = teacher_type if teacher_type == "greedy": self.eta_teacher = eta_learner elif teacher_type == "plus_delta": self.eta_teacher = eta_learner + self.delta elif teacher_type == "minus_delta": self.eta_teacher = eta_learner - self.delta elif teacher_type == "random": self.eta_teacher = eta_learner elif teacher_type == "approximate_Q": self.eta_teacher = eta_learner elif teacher_type == "noise_feature" or teacher_type == "limited_ground_truth": self.eta_teacher = eta_learner else: print("Wrong teacher type -- ", self.teacher_type) print( "Please choose one of the following: [greedy, plus_delta, minus_delta, random]" ) exit(0) self.learner = learner.learner(Q_0=Q_0, H=H, eta=self.eta_teacher) self.eps = eps self.max_iter = max_iter self.mu = mu self.top_k = top_k self.h_star_index = utils.find_h_star_index_given_examples(H, examples) self.h_star = self.H[self.h_star_index] self.examples_train = utils.remove_all_inconsistent_examples( self.h_star, examples) self.list_of_error_h = utils.error_for_every_h(H, self.examples_train) if teacher_type == "approximate_Q": self.Q_0 = self.get_perturbed_initial_distribution() self.C_eps = self.get_C_eps()
def compare_neighbor(self, y0, yn, var=NONE): """ Compare fullsize of images in neighbour area """ estr = learner((y0.shape[1], y0.shape[0]), [4, 4]).new_estimator() estr.update(y0[:, :, 0], yn[:, :, 0]) estr.update(y0[:, :, 1], yn[:, :, 1]) estr.update(y0[:, :, 2], yn[:, :, 2]) n = len(estr.neighbor_similarity_flat) best_sim = np.array([[0]] * n) for k in range(n): best_sim[k] = max(estr.neighbor_similarity_flat[k]) if var != NONE: best_sim = best_sim * var.flat return np.mean(best_sim)
def dataProcess(self, file, data_path, model, feature_selector, normalize=False, missingValueTreatment=False, featureSelect=False): self.excel_data = pd.read_csv(file) self.model = model self.headers = self.excel_data.columns.values.tolist() for __header in self.headers: if self.header_ignore in __header: self.excel_data.drop([__header], axis=1, inplace=True) if self.normalization_ignore in __header: self.df[__header] = self.excel_data[__header] self.excel_data.drop([__header], axis=1, inplace=True) if self.class_s in __header: self.class_label = __header self.processed_headers = self.excel_data.columns.values.tolist() if missingValueTreatment: self.excel_data = pd.DataFrame(data=self.imputation( self.excel_data), columns=self.processed_headers) if normalize: self.dependent = self.excel_data[self.class_label] self.excel_data.drop([self.class_label], axis=1, inplace=True) self.excel_data = pd.concat( [self.DataNormalize(self.excel_data), self.df], axis=1) self.excel_data[self.class_label] = self.dependent if featureSelect: model = learner.learner(2) self.clf = model.selectedLearner(self.model) self.excel_data = self.featureSelction(self.excel_data, feature_selector) if self.excel_data.isnull().values.any(): print("There is blank cells, please check..") print(self.excel_data.isnull().sum()) self.excel_data.to_pickle(os.path.join(data_path, "processed_data.pkl")) self.excel_data.to_csv(os.path.join(data_path, "processed_data.csv"))
def main(): domain_path = "doms and probs/satellite_domain_multi.pddl" #"doms and probs/rover_domain (2).pddl"# # # #sys.argv[2] problem_path = "doms and probs/satellite_problem_multi.pddl" #"doms and probs/rover_problem (2).pddl" # # # sys.argv[3] # Find the problem and the world in the problem name file = open(problem_path, 'r') policy_name = file.readline().replace("(define (problem ", "") while policy_name == "\n": policy_name = file.readline().replace("(define (problem ", "") policy_name = policy_name[0:policy_name.find(")")] + "_Q_Table" file.close() file = open(domain_path) is_deterministic = file.read().find("probabilistic") == -1 file.close() file = open(problem_path) is_transparent = file.read().find("reveal") == -1 file.close() # if (is_deterministic == False): # load\create the Qtable q = q_table.q_table() q.build_from_file(policy_name) # if sys.argv[1] == "-E": # execute mode # # if is_deterministic == False: #print LocalSimulator().run(domain_path, problem_path, Q_learner_executer.executer(q)) # else: #print LocalSimulator().run(domain_path, problem_path, graph_executor.executer(q,Graph.Read("roverMqsdkJedCB9zj3HW+RaxCJQcsI6i4w3XNYU8vDEdkFo=.graphml", "graphml"))) # elif sys.argv[1] == "-L": # atexit.register(q.save_to_file, policy_path=policy_name) # learn mode print LocalSimulator().run(domain_path, problem_path, learner.learner(q))
import datagate import learner import evaluator import sys NumFeat = 46 NumberOfGames = int(sys.argv[1]) pastdata = datagate.getdata(NumberOfGames) # this part uses learner.py , which applies linear regression and get a vector of coefficients print("Begin learning...") coe = learner.learner(pastdata, NumFeat) print(coe) # this part is to test whether learner works reasonably by # allowing manual input while (True): inp = input().split(" ") for i in range(0, len(inp)): inp[i] = int(inp[i]) if (inp[0] == -1): break if (len(inp) != NumFeat): print("Not a legal vector\n") else: print(evaluator.evaluator(coe, inp, NumFeat))
#Scale the features so each vector is of unit modulus textData.bagofwords = textData.tfidf_df.apply(lambda x: x/np.linalg.norm(x),1) #Include dummy variables for each class label in the dataframe #####1 - Positive##### #####0 - Negative##### textData.bagofwords["classLabel"] = pd.get_dummies(all_data['sentiment'])['pos'] #Include the original text in the tfidf-dataframe textData.bagofwords["origText"] = all_data.text #Choose 1 from each class- positive & negative labeled_data = textData.bagofwords.loc[[0,500]] #Shuffle the remaining dataset shuffle = textData.bagofwords.loc[np.random.permutation(textData.bagofwords[~textData.bagofwords.index.isin([0,500])].index)] #Use 150 for the pool of unlabeled, and 50 for the test data unlabeled_data = shuffle[0:150] test_data = shuffle[150::] data1 = machine_learning.ActiveLearningDataset(labeled_data,classLabel="classLabel",origText="origText") data2 = machine_learning.ActiveLearningDataset(unlabeled_data,classLabel="classLabel",origText="origText") data3 = machine_learning.ActiveLearningDataset(test_data,classLabel="classLabel",origText="origText") active_learner = learner.learner(data1,test_datasets=data3,probability=0,NBC=True) length = len(data1.data) active_learner.pick_initial_training_set(length) active_learner.rebuild_models(undersample_first=True) active_learner.unlabeled_datasets.add_data(data2.data) active_learner.active_learn(10, num_to_label_at_each_iteration=2)
def __init__(self): self.env = chess_env() self.learner = learner(self.env) self.learner.learn()
# svm.set_data(XTrain, YTrain) # svm.set_kernel('rbf') # svm.make_kernel_matrix(gamma=gamma) # svm.train(c) # RQS = svm.test_error(XVal, YVal) # if RQS<BestRSQ: # BestC=c # BestRSQ=RQS # # # get test error print "SVM -Pegasus" # Pegasus: Bestgamma = 0 Bestmax_epochs=0 BestRSQ = float('Inf') for gamma in range(1,5): for max_epochs in range (100,102): svm = learner() svm.set_data(XTrain, YTrain) svm.set_kernel('rbf') svm.make_kernel_matrix(gamma=gamma) svm.train_pegasos_kernelized(1, max_epochs) RQS = svm.test_error(XVal, YVal) if RQS<BestRSQ: Bestgamma=gamma Bestmax_epochs=max_epochs BestRSQ=RQS # get test error
def main(): # for the first time intialize random weights for the synapses. # otherwise use the output synapse from run (N) for run (N+1). # for first run hl = {1: [50, 100, 140]} hls = hl[1] input_size = 2 num_of_iterations = 1000 # mass = [0.145, 0.50, 0.05, 1, 1.50 , 2] mass = np.linspace(0.01, 2, 31) # mass = [0.145] frame = 0 actions = actions_creator() act_mat = 1000 * np.ones([len(actions), len(actions)]) R_mat = np.zeros([len(actions), len(actions)]) q_mat = np.zeros([len(actions), len(actions)]) errors_vec = 1000 * np.ones([len(actions)]) # for other runs training_error = np.zeros([len(mass), num_of_iterations, 2]) # synapse_0, synapse_1, synapse_2, synapse_3 = lnr.initialize_synapses(hls, input_size) mass_std = np.zeros(len(mass)) for m in range(len(mass)): actions_count = np.zeros(len(actions)) # print m synapse_0, synapse_1, synapse_2, synapse_3 = lnr.initialize_synapses( hls, input_size) for i in range(num_of_iterations): training_error[m, i, 0] = i if i == 0: current_action = np.random.randint(len(actions)) prev_action = current_action action = actions[current_action] actions_count[current_action] += 1 l4_error, training_error[ m, i, 1], synapse_0, synapse_1, synapse_2, synapse_3 = lnr.learner( synapse_0, synapse_1, synapse_2, synapse_3, action, mass[m]) # print action, l4_error**2 act_mat, next_action = critic_actor_v1(prev_action, current_action, act_mat, l4_error) prev_action = current_action current_action = next_action # if m == 50: TODO: random learner # current_action = np.random.randint(100) # if i % 1000 == 999 and m == 0: # st = '{0}, {1}'.format(mass[m],i) # plt.figure(st) # ax = plt.subplot2grid((1, 1), (0, 0)) # ax.bar(np.linspace(0, len(actions), len(actions)), actions_count) # print actions_count mass_std[m] = np.std(actions_count) plt.figure('errors') colormap = plt.cm.gist_ncar plt.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.99, 10)]) for m in range(len(mass)): if m % 15 == 0: c = ['r', 'b', 'g', 'y', 'k', 'm'] # plt.figure('m = '+str(mass[m])+'kg') pl = 'm = %.2fkg' % (mass[m]) plt.plot(training_error[m, :, 0], training_error[m, :, 1], label=pl) # plt.plot(training_error[m, :, 0], training_error[m, :, 1], label='m = ' + str(mass[m]) + 'kg') plt.xlabel('Steps') plt.ylabel('Loss function') plt.ylim([0, 40]) image_names = 'graphs' save = False if save: for t in range(10): plt.savefig('./figs1/' + image_names + string.zfill(str(frame), 5) + '.png', format='png') frame += 1 plt.legend() plt.figure('std vs. mass') plt.scatter(mass, mass_std) plt.show()
curr_testData['classLabel'] = classDummies[col].loc[ curr_testData.index] data1 = machine_learning.ActiveLearningDataset(curr_labeledData, classLabel="classLabel", origText="origText") data2 = machine_learning.ActiveLearningDataset(curr_unlabeledData, classLabel="classLabel", origText="origText") data3 = machine_learning.ActiveLearningDataset(curr_testData, classLabel="classLabel", origText="origText") #Create learner, with labeled dataset as initial training active_learner = learner.learner(data1, test_datasets=data3, NBC=False, className=classDefinitions[col]) active_learner.load() classifiers[col] = active_learner #Confirm about to start new classifier: while True: var = raw_input( 'Would you like to continue building a classifier for class %s? \nY or N? \nAnswer:' % classDefinitions[col]) if var not in ('Y', 'N'): print 'Choose either Y or N' continue else: break if var == 'N': continue
__author__ = 'davidvinegar' from learner import learner import pandas as pd import util import matplotlib.pyplot as plt import datetime as dt import KNNLearner as knn l = learner() dates = pd.date_range('2007-12-31', '2009-12-31') symbol = "IBM" momentumDF = l.getMomentum(dates, symbol) fiveDayPriceChange = l.getWeekPercentPriceChange(dates, symbol) volatilityDF = l.getVolatility(dates, symbol) bollingerBandDf = l.getBollingerBandVAlue(symbol, dates, volatilityDF) stats = l.getStats(momentumDF, volatilityDF, bollingerBandDf) bollingerBandDf = l.normalizeDataFrame(bollingerBandDf) momentumDF = l.normalizeDataFrame(momentumDF) volatilityDF = l.normalizeDataFrame(volatilityDF) unalteredPrices = util.get_data([symbol], dates, addSPY=False).dropna() fiveDayPriceChange, trainX, trainY, unalteredPrices = l.prepareTrainXandY( bollingerBandDf, fiveDayPriceChange, momentumDF, unalteredPrices, volatilityDF, symbol) #Uncomment the LinRegl and comment the KNN Learner to use that instead of KNN # learner = lrl.LinRegLearner(verbose = True) # create a LinRegLearner learner = knn.KNNLearner(2, verbose=True) # create a knn learner
data_path: Folder path where the dataset exists algo: required only when using recursive_feature_selector as feature selector feature_selector = feature selector next 3 parameters are for whether to use Normalization, Missing value treatment and if feature selection will be done ++++++++++++++++++++++++++++++++++++ """ dProcessor.dataProcess(file_path, data_path, algo, feature_selector, True, True, False) """ ++++++++++++++++++++++++++++++++++++ cross validation parameter: nFold = Integer value depending on number of fold you want to create ++++++++++++++++++++++++++++++++++++ """ nFold = 10 model = learner.learner(nFold) """ ++++++++++++++++++++++++++++++++++++ Learner to Run: Available options are - 1) Naive Bayes: NB 2) Decision Tree: DT 3) Random Forest: RF 4) Support Vector Machine: SVM 5) Logistic Regression: LR 6) Neural Network: MLP 7) Our Implementation of Neural Network: NN 8) AdaBoost: ADA 9) Tree Based Naive Bayes with depth 2: NBL2 10) Tree Based Naive Bayes with depth 3: NBL3 11) Our Implementation of Naive Bayes: NBO
import poker,random,learner if __name__ == "__main__": deck = poker.poker() deck.shuffle() player1 = 'b' deck.registerPlayer(player1) player2 = 'learnerc' deck.registerPlayer(player2) hand1 = deck.deal(player1) hand2 = deck.deal(player2) l = learner.learner(hand2) l.learnerid = player2 print "{}: {}".format(player1," ".join(hand1)) print "{}: {}".format(player2," ".join(hand2)) select1 = random.randint(0,3) hand1= deck.swap(player1,random.sample(hand1, select1)) print "{} is swapping {} random cards.".format(player1, select1) print "\t{}".format(" ".join(hand1)) hand2 = deck.swap(player2, l.chooseswap()) print "{} is swapping {} cards.".format(player2, len(hand2)) print "\t{}".format(" ".join(hand2)) #print "{}: {}".format(player1," ".join(hand1)) #print "{}: {}".format(player2," ".join(hand2)) winner = deck.resolve() print "Winner: {}".format(winner) l.recordwin(winner) #print deck.deck
def find_neighbor_indices(self, y0): #pdb.set_trace() self.estimator = learner((y0.shape[1], y0.shape[0]), [4, 4]).new_estimator() self.estimator.init_structures(y0[:, :, 0])
import matplotlib.pyplot as plt from bandit import Bandit from learner import learner ARMS = 10 N_BANDITS = 2000 PLAYS = 1000 # pylint: disable=C0103 bandits = [Bandit() for _ in range(N_BANDITS)] average_oracle = [ numpy.average([bandits[i].oracle_value for i in range(N_BANDITS)]) ] * PLAYS history_value, history_optimal = learner(bandits, PLAYS, 'greedy') history_value_0_01, history_optimal_0_01 = learner(bandits, PLAYS, 'greedy', eps=0.01) history_value_0_1, history_optimal_0_1 = learner(bandits, PLAYS, 'greedy', eps=0.1) average_oracle = [ numpy.average([bandits[i].oracle_value for i in range(N_BANDITS)]) ] * PLAYS plt.plot([i for i in range(len(history_value))], average_oracle, label="oracle")
with open(custom) as f: custom_args = yaml.load(f.read()) args.update(custom_args) args['cuda'] = args['cuda'] and torch.cuda.is_available() log_dir = f'results/{datetime.now().strftime("%Y-%m-%d-%H:%M:%S")}-{args["env"]}-{args["prefix"]}' log_dir = MPI.COMM_WORLD.bcast(log_dir, root=0) args['log_dir'] = log_dir if MPI.COMM_WORLD.Get_rank() == 0: os.makedirs(log_dir) with open(log_dir + '/configuration.yaml', 'w') as f: f.write(yaml.dump(args)) args['device'] = torch.device('cuda' if args['cuda'] else 'cpu') return args if __name__ == '__main__': torch.set_num_threads(1) args = parse() set_context(args['num_units'], args['num_actors']) if global_dict['unit_idx'] == args['num_units']: evaluator(args) else: rank = global_dict['rank_local'] if rank == global_dict['rank_learner']: learner(args) elif rank == global_dict['rank_replay']: replay(args) else: actor_id = rank actor(args, actor_id)
# -*- coding: utf-8 -*- """ Created on Fri Mar 2 11:51:43 2018 @author: suvod """ import os import pandas as pd import dataProcessor import learner if __name__ == "__main__": data_loc = 'data' dataSet = "songDataSet.csv" cwd = os.getcwd() data_path = os.path.join(cwd, data_loc) file_path = os.path.join(data_path, dataSet) dProcessor = dataProcessor.dataProcessor() dProcessor.dataProcess(file_path, data_path, True, True, False) model = learner.learner() model.train('DT')
#Make copies of the datasets curr_labeledData = labeledData.copy() curr_unlabeledData = unlabeledData.copy() curr_testData = testData.copy() #Overwrite the old classLabel with binary class labels curr_labeledData['classLabel'] = classDummies[col].loc[curr_labeledData.index] curr_unlabeledData['classLabel'] = classDummies[col].loc[curr_unlabeledData.index] curr_testData['classLabel'] = classDummies[col].loc[curr_testData.index] data1 = machine_learning.ActiveLearningDataset(curr_labeledData,classLabel="classLabel",origText="origText") data2 = machine_learning.ActiveLearningDataset(curr_unlabeledData,classLabel="classLabel",origText="origText") data3 = machine_learning.ActiveLearningDataset(curr_testData,classLabel="classLabel",origText="origText") #Create learner, with labeled dataset as initial training active_learner = learner.learner(data1,test_datasets=data3,NBC=False,className = classDefinitions[col]) active_learner.load() classifiers[col] = active_learner #Confirm about to start new classifier: while True: var = raw_input('Would you like to continue building a classifier for class %s? \nY or N? \nAnswer:' % classDefinitions[col]) if var not in ('Y','N'): print 'Choose either Y or N' continue else: break if var == 'N': continue length = len(data1.data) active_learner.pick_initial_training_set(length) active_learner.rebuild_models(undersample_first=True)
from db import spartandb from PyDictionary import PyDictionary from learner import learner read_file = open("subjects.txt", "r") keywords = read_file.read().split(",") keywords = list(set(keywords)) dbclient = spartandb() dictionary = PyDictionary() for keyword in keywords: dbclient.insert_subject(keyword.lower()) read_file.close() read_file = open("objects.txt", "r") keywords = read_file.read().split(",") keywords = list(set(keywords)) dbclient = spartandb() dictionary = PyDictionary() for keyword in keywords: dbclient.insert_object(keyword.lower()) for key in keyword.split(" "): if dictionary.synonym(key) is not None: for synonym in dictionary.synonym(key): dbclient.insert_object(synonym) learner_mod = learner() learner_mod.read() #print dbclient.get_keywords()
def main(): filename='Eur-Lex' path='../'+filename+'/' + filename # small checks # al=min_max_variance_active_learner(path) # al.active_select() # return """with open(path+'.seed.0','r') as fp: line=fp.readline() print len(line.strip().split(' ')) return l=learner(path) l.read_sparse_data(path+'.train.norm',nfeatures=1837) sparse_list_data=[1,2,3,4,5,6,7,8] sparse_list_row=[0,1,2,3,4,5,6,7] sparse_list_col=[0,1,0,1,0,1,0,1] Q=csr_matrix((np.array(sparse_list_data),(np.array(sparse_list_row),np.array(sparse_list_col))),shape=(8,2)) print Q.todense() Q=normalize(Q, axis=0, norm='l2') print Q.todense() return """ # change tradeoff function calls if len(sys.argv)>1: input_text = sys.argv[1] if input_text in ['tradeoff','check']: input_method = sys.argv[2] else: input_text='generate_seed'#tradeoff'#''#'random'#'random'#'check_minmax_l2distance'#'preprocess' input_method='random'#'multiple_seed'#'maxquery_vary_delta' #********************** #***Tradeoff Section*** #********************** if input_text in 'tradeoff': seed_file_idx=['0']#[str(i) for i in range(5)] seed_files=[path+'.seed.'+idx for idx in seed_file_idx] max_iter=2 #********************************** if input_method in 'max_query_vary_delta': set_of_delta=list(np.arange(.1,.2,.01)) for fseed in seed_files: for delta in set_of_delta: active_learner_general(max_query_active_learner(path,delta=delta), fseed, fsave_ext='.delta_'+str(delta), max_iter=max_iter) return #********************************* if input_method in 'minmax': al= min_max_inner_prod(path) if input_method in 'minsum':# al= min_sum_inner_prod(path) if input_method in 'minmin':# al= min_min_inner_prod(path) if input_method in 'maxquery':# delta=.1 save_fig_interval=50 al=max_query_active_learner(path,delta=delta, save_fig_interval=save_fig_interval) if input_method in 'minmaxvar': al=min_max_variance_active_learner(path, count=5) if input_method in 'random':# al= random_active_learner(path) run_tradeoff_exp(al,seed_files,max_iter=max_iter) #********************************************************** #****************CHECK************************************* #********************************************************** if input_text in 'check': if input_method in 'tuning': l=learner(path) l.tune() if input_method in 'popular_n_rare': l=learner(' ') acc = l.train_test() with open('acc','a') as fp : fp.write(' '.join([str(acc_val) for acc_val in acc])+'\n') #d=data_process() #d.get_seed(frac=.5,ftrain='train') #d.get_label() #l.read_out_file() #l.decide_popular_n_rare_labels() #l.get_acc() return """ X,Y,Q=generate_sparse_data(nsamples=5,nlabels=2,nfeatures=2,density=.75) al=min_max_inner_prod(path) al.Xtrain=X al.Ytrain=Y al.Q=Q for i in range(5): al.active_select() return""" if input_method in 'min_max_l2distance': check_min_max_dist_outer(path) if input_method in 'ball_tree': X,Y,Q = generate_sparse_data(nsamples=8,nlabels=2,nfeatures=2,density=.75) al=min_max_l2distance_lower(path,leaf_size=2) al.Xtrain=X al.Ytrain=Y al.Q=Q al.create_ball_tree() al.brute_force_l2_norm(X,np.array(range(X.shape[0]))) print('************\n\ndoing active select\n\n *******************') al.active_select() #al.show_ball_tree_n_points() if input_method in 'inner_prod':# checking is left k=1 l=learner(path) l.read_metadata() # change ftrain to ftrain_src l.ftrain=l.ftrain_src # run train and test * create Q, create out file , decide a rank value l.train_test(k=k,acc_method='popular_n_rare')# define rank, default k=1 # read test file, l.Ytest,l.Xtest=l.read_sparse_data(l.ftest, l.nfeatures) # compute out file values pred_computed = l.compute_k_scores(l.Xtest, l.Q, k=k) # change the output if required later Ytest_comp=[] score_comp=[] for sample_score in pred_computed: Ytest_comp.append(sample_score[0][0]) score_comp.append(sample_score[0][1]) # compare and report the result Ytest_pred, score_pred = l.read_out_file() for label1,label2,score1,score2 in zip(Ytest_comp,Ytest_pred, score_comp, score_pred): print('label1:'+str(label1)+',label2:'+str(label2)+',score1:'+str(score1)+',score2:'+str(score2)+'\n') #********************************************************** #****************PLOT************************************* #********************************************************** if input_text in 'plot': if input_method in 'active_score_per_iter': path='../results/exp7rep/bibtex.maxquery.count.' nfiles=40 plot_active_score_per_iter(path, nfiles) if input_method in 'samples per labels': l=learner(path) l.read_metadata() Ytrain=l.read_sparse_data(l.ftrain_src,l.nfeatures)[0] l.find_samples_per_label(l.nlabels,Ytrain) if input_method in 'single_seed': plot_single_seed() if input_method in 'multiple_seed': seed_set=[str(i) for i in range(5) ] #seed_set=[''] path='../results/testing/bibtex.acc.' method_list=['minmaxdev','random'] dp = data_process() dp.readfiles_outer(path, method_list, seed_set, num_of_acc=2,fsave=path) # modify if input_method in 'ad-hoc': path='../results/exp5/e/bibtex.acc.' method_list=['maxquery','random'] file_list = [path+m for m in method_list] plot_your_choice(2 , file_list, method_list) #********************************************************** #****************OTHER************************************* #********************************************************** if input_text in 'preprocess': l = learner(path) list_of_files=[path+ext for ext in ['.train', '.test', '.heldout']] l.read_metadata() l.read_normalize_write_sparse_files(list_of_files,l.nfeatures) if input_text in 'min_max_l2distance': print('minmax l2 distance') leaf_size=20 bound='lower' minmaxl2=min_max_l2distance(path,leaf_size,bound) fseed=path+'.seed.0' active_learner_general(minmaxl2,fseed) #active_learner_general(random_al,frac_seed) if input_text in 'generate_seed': #print 'seed generation' start_idx=0 nseed_required=1#10 gen_seed=data_process() frac_seed=.1 ftrain=path+'.train.norm' for i in range(nseed_required): fseed=path+'.seed.'+str(i+start_idx) #print fseed #list_of_labels,nlabels =gen_seed.get_label(ftrain) seeds=gen_seed.get_seed(frac_seed,ftrain)# how to get nlabels #print len(seeds) #with open(fseed,'w') as fp: # fp.write(' '.join(str(s) for s in seeds)) #seed_generator.get_seed(frac_seed,fseed) if input_text in 'changing_seed':# incomplete frac_l=0 frac_u=0 frac_diff=0 gen_seed=data_process() for frac in range(frac_l,frac_u, frac_diff): seeds=gen_seed.get_seed(frac_seed,ftrain) with open(fseed,'w') as fp: fp.write(' '.join(str(s) for s in seeds))
def find_neighbor_indices(self, y0): # pdb.set_trace() self.estimator = learner((y0.shape[1], y0.shape[0]), [4, 4]).new_estimator() self.estimator.init_structures(y0[:, :, 0])