def __init__(self, ids=["oxfess"], file="posts.csv", depth=5, delay=2): self.ids = ids self.out_file = file self.depth = depth + 1 self.delay = delay # browser instance self.browser = webdriver.Chrome(executable_path=GECKODRIVER) utils.create_csv(self.out_file)
def greedy_learn(steps=5, seed=None, splits=10, save=True, savepath='submissions/greedy.csv', failed_runs=10000): rg = np.random.RandomState(seed) toposort = list(rg.permutation(22)) extra_edges = set() nodes = 23 #Initial Edges edges = [(22, i) for i in range(22)] train = get_train() #Get score on naive Bayes max_score = get_kfold_accuracy(BayesN(nodes=nodes, edges=edges), train, splits) st = 0 f = 0 while st < steps and f < failed_runs: while True: #Select Edges u = rg.randint(22) v = rg.randint(22) if u == v or (u, v) in edges or (v, u) in edges: continue cand_edge = (u, v) if toposort.index(u) < toposort.index(v) else (v, u) break edges.append(cand_edge) #Get Score with new network score = get_kfold_accuracy(BayesN(nodes=nodes, edges=edges), train, splits) #Check if addtion of edge increased performance if score > max_score: extra_edges.add(cand_edge) max_score = score st += 1 print('Step', st, ':', max_score, 'Extra Edges:', extra_edges) else: edges.pop() f += 1 print('Max Score:', max_score) print('Extra Edges:', extra_edges) if save: test_data = read_csv(utils.TEST_PATH)[:, 1:] model = BayesN(nodes=nodes, edges=edges) model.fit(train) y_pred = model.predict(test_data) create_csv(y_pred, savepath) return model
def __init__(self, ids=["oxfess"], file="posts.csv", depth=5, delay=2): self.ids = ids self.out_file = file self.depth = depth + 1 self.delay = delay # browser instance self.browser = webdriver.Firefox( executable_path=GECKODRIVER, firefox_binary=FIREFOX_BINARY, firefox_profile=PROFILE, ) utils.create_csv(self.out_file)
def gen_training_accuracy(args): # load data and model params = utils.load_params(args.model_dir) ckpt_dir = os.path.join(args.model_dir, 'checkpoints') ckpt_paths = [int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt"] ckpt_paths = np.sort(ckpt_paths) # csv headers = ["epoch", "acc_train", "acc_test"] csv_path = utils.create_csv(args.model_dir, 'accuracy.csv', headers) for epoch, ckpt_paths in enumerate(ckpt_paths): if epoch % 5 != 0: continue net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch, eval_=True) # load data train_transforms = tf.load_transforms('test') trainset = tf.load_trainset(params['data'], train_transforms, train=True) trainloader = DataLoader(trainset, batch_size=500, num_workers=4) train_features, train_labels = tf.get_features(net, trainloader, verbose=False) test_transforms = tf.load_transforms('test') testset = tf.load_trainset(params['data'], test_transforms, train=False) testloader = DataLoader(testset, batch_size=500, num_workers=4) test_features, test_labels = tf.get_features(net, testloader, verbose=False) acc_train, acc_test = svm(args, train_features, train_labels, test_features, test_labels) utils.save_state(args.model_dir, epoch, acc_train, acc_test, filename='accuracy.csv') print("Finished generating accuracy.")
def gen_testloss(args): # load data and model params = utils.load_params(args.model_dir) ckpt_dir = os.path.join(args.model_dir, 'checkpoints') ckpt_paths = [int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt"] ckpt_paths = np.sort(ckpt_paths) # csv headers = ["epoch", "step", "loss", "discrimn_loss_e", "compress_loss_e", "discrimn_loss_t", "compress_loss_t"] csv_path = utils.create_csv(args.model_dir, 'losses_test.csv', headers) print('writing to:', csv_path) # load data test_transforms = tf.load_transforms('test') testset = tf.load_trainset(params['data'], test_transforms, train=False) testloader = DataLoader(testset, batch_size=params['bs'], shuffle=False, num_workers=4) # save loss criterion = MaximalCodingRateReduction(gam1=params['gam1'], gam2=params['gam2'], eps=params['eps']) for epoch, ckpt_path in enumerate(ckpt_paths): net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch, eval_=True) for step, (batch_imgs, batch_lbls) in enumerate(testloader): features = net(batch_imgs.cuda()) loss, loss_empi, loss_theo = criterion(features, batch_lbls, num_classes=len(testset.num_classes)) utils.save_state(args.model_dir, epoch, step, loss.item(), *loss_empi, *loss_theo, filename='losses_test.csv') print("Finished generating test loss.")
return ans def predict(self, features): y_pred = [] for i in range(features.shape[0]): probabs = [ self.infer_logprobab(features[i], y) for y in self.classes ] y = self.classes[np.argmax(np.array(probabs))] y_pred.append(y) return np.array(y_pred) if __name__ == "__main__": laplacians = [1, 0.5, 0.1, 2] from utils import read_csv, create_csv import utils for l in laplacians: model = NBayes(l) train_data = read_csv(utils.TRAIN_PATH) x_train = train_data[:, 1:] y_train = train_data[:, 0] model.fit(x_train, y_train) test_data = read_csv(utils.TEST_PATH)[:, 1:] y_pred = model.predict(test_data) create_csv(y_pred, 'submissions/NBayes_' + str(l) + '.csv')
if verb_print == True: print('\033[94m' + df.columns[feature_1] + '\033[0m', end='') print(' vs ', end='') print('\033[96m' + df.columns[feature_2] + '\033[0m') print('Accuracy: ', end='') if ac > min_accuracy: print('\033[92m', end='') else: print('\033[91m', end='') print(str(ac) + '\033[0m\n') if verb_standardize == True and i == 0: utils.show_standardize(x, y, house[i], df, feature_1, feature_2, theta) if verb_cost == True and i == 0: utils.show_cost(history_err) if ac >= min_accuracy: row_list.append([ house[i], df.columns[feature_1], df.columns[feature_2], theta[0], theta[1], theta[2], mean[0], mean[1], std[0], std[1], ac ]) utils.create_csv(row_list, 'weights.csv') global_acc = 0 i = 1 while i <= len(row_list) - 1: global_acc += row_list[i][10] i += 1 print('\033[92m' + 'Average Accuracy: ' + '\033[0m' + str(global_acc / (i - 1)))
def __init__(self, name, start_date=None, end_date=None, headers=None, response=None, delay=DELAY, log=None, proxies=None, bot_last_status=None, requests=requests, *args, **kwargs): # the name of the package is used to resolve resources from inside the # package or the folder the module is contained in the resolve. self.name = name # start date and end date is needed so that the parser is able to have a # set time limit within which the bot will parse the data in the pages. # This is also needed for giving the determining the name of the csv # files and the log files where the respective information will go. self.start_date = start_date self.end_date = end_date # This will need to be defined by the user, as he would have already # seen the page and would have a clear picture about the headers that # should be there in the csv file # self.headers = UserAgentRotator().generate_header() self.headers = header = { "Connection": "close", "User-Agent": "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)" } # The class that is used for response objects. In the present scenario # this is a response object of the requests class. self.response = response # a filename will need to be decided that will define the file names # that will be there filename = self.name # log handler is set, where the file name is based on the starting date # and the name of the bot as given by the user. if the logging flag is # set to stdout, log file is not initialised and the stdout is to # stdout. log_date = convert_to_filenameable(self.start_date) self.log = log filename = "{filename}_{log_date}".format(filename=filename, log_date=log_date) if scraper_logging == "stdout": log_file = None elif scraper_logging == "file": log_file = "log/{filename}.log".format(filename=filename) else: raise ValueError("please set correct logging flag for" "scraper_logging") if log_date: self.log = setup_logging(log_file) else: raise ValueError("cannot create log file, " "log date is not resolved. " "log_date: {log_date}".format(log_date=log_date)) # a delay of `DELAY` seconds will be set for the bot. This is 1 second # by default. You can change this by changing the delay parameter. self.delay = delay # an instance will have a default proxy that # will be fixed for the particular instance and will act as a random # agent for the bot if not proxies: self.proxies = None #ProxyRotator().proxies else: self.proxies = proxies # create a csv file if it does not exist. Also check if there is a csv # file that already exists. self.csvfile = create_csv(filename) # will have a definition for bot last status to keep a track of the last # status of the bot. This will be a dictionary self.bot_last_status = bot_last_status # dependency injection for requests self.requests = requests
def greedy_learn2(steps=5, seed=None, splits=10, save=True, savepath='submissions/greedy.csv', failed_runs=10000): rg = np.random.RandomState(seed) g = nx.DiGraph() for i in range(22): g.add_edge(22, i) nodes = 23 #initialize edges as Naive Bayes edges = set([(22, i) for i in range(22)]) train = get_train() #Get score for Naive Bayes max_score = get_kfold_accuracy(BayesN(nodes=nodes, edges=edges), train, splits) print('Step 0:', max_score) st = 0 f = 0 while st < steps and f < failed_runs: #Select r r = rg.randint(3) gr = copy.deepcopy(g) ed = copy.deepcopy(edges) #Add random edge if r == 0: while True: u = rg.randint(22) v = rg.randint(22) if u == v or (u, v) in g.edges: continue g.add_edge(u, v) #Reject if G in not DAG if not nx.is_directed_acyclic_graph(g): g.remove_edge(u, v) continue break if nx.is_directed_acyclic_graph(g): edges.add((u, v)) #Delete a random edge elif r == 1: if len(edges) <= 3: continue del_edge = random.sample(edges, 1)[0] edges.remove(del_edge) g.remove_edge(*del_edge) #Reverse orientation of random edge else: if len(edges) <= 3: continue for i in range(len(edges)): act_edge = random.sample(edges, 1)[0] rev_edge = (act_edge[1], act_edge[0]) g.remove_edge(*act_edge) g.add_edge(*rev_edge) if nx.is_directed_acyclic_graph(g): break else: g.remove_edge(*rev_edge) g.add_edge(*act_edge) if nx.is_directed_acyclic_graph(g): edges.remove(act_edge) edges.add(rev_edge) #Get Score on new BN score = get_kfold_accuracy(BayesN(nodes=nodes, edges=edges), train, splits) if score > max_score: max_score = score st += 1 print('Step', st, ':', max_score, 'Edges:', edges) else: edges = ed g = gr f += 1 print('Max Score:', max_score) print('Edges:', edges) if save: test_data = read_csv(utils.TEST_PATH)[:, 1:] model = BayesN(nodes=nodes, edges=edges) model.fit(train) y_pred = model.predict(test_data) create_csv(y_pred, savepath) return model
import argparse from datetime import datetime import math from utils import parse_xls, create_csv if __name__ == "__main__": parser = argparse.ArgumentParser(description="Find distance") parser.add_argument("-i", "--input", help="Input file (default: ./files/test.xlsx)", default="./files/test.xlsx") parser.add_argument("-s", "--slice", help="Slice number of final list (default: None)", default=None) args = parser.parse_args() places = parse_xls(args.input, int(args.slice)) create_csv(places)
results = {} image_filenames = get_files_in_directory(f"{dataset_dir}/test") # make a square image while keeping aspect ratio and filling with fill_color def make_square(im, min_size=256, fill_color=(0, 0, 0, 0)): x, y = im.size size = max(min_size, x, y) new_im = Image.new('RGB', (size, size), fill_color) new_im.paste(im, (int((size - x) / 2), int((size - y) / 2))) return new_im for image_name in image_filenames: img = Image.open(f"{dataset_dir}/test/{image_name}").convert('RGB') # NN input is 256, 256 img = img.resize((256, 256)) img_array = np.expand_dims(np.array(img), 0) # Normalize img_array = img_array / 255. # Get prediction softmax = model.predict(x=img_array) # Get predicted class (index with max value) prediction = tf.argmax(softmax, 1) # Get tensor's value prediction = tf.keras.backend.get_value(prediction)[0] results[image_name] = prediction create_csv(results, MODEL_NAME)
'python3.9 logreg_predict.py dataset_test.csv weights.csv') sys.exit() df = pd.read_csv(sys.argv[1]) weights = pd.read_csv(sys.argv[2]) show_chart = True row_list = [["Index", "Hogwarts House"]] for i in range(len(df)): tmp = predict_house(df.loc[i], weights.to_numpy()) student_results.append(tmp) row_list.append([i, tmp]) if (contain_value_from_train(df)): accuracy = get_accuracy(df["Hogwarts House"].tolist(), student_results) print("Accuracy: ", end='') if accuracy >= 0.98: print('\033[92m', end='') else: print('\033[91m', end='') print(accuracy, '\033[0m') if (show_chart == True): utils.show_repartition(student_results, df["Hogwarts House"].tolist()) else: print( '\033[93m' + '⚠ Houses are missing, pls provide a csv with them if you want to see Accuracy.' + '\033[0m') utils.create_csv(row_list, "houses.csv")
def print_generations_makespan(self): print("POPULATION MAKESPAN\n") self.populations_makespan[1] = self.populations_makespan[1][-1] print(self.populations_makespan) create_csv(self.num_jobs, self.num_machines, self.pop, self.iteration, self.populations_makespan, self.instance_name)