def setup_nodes(net: mininet.net.Mininet, configs): # hardcode some stuff here tcp_port1 = 9998 tcp_port2 = 9999 if configs.h2 else None h1 = net.get("h1") h3 = net.get("h3") h1_result = get_filename(h1, configs) h2_result = get_filename("h2", configs) # need to remove this file if already exists for filename in {h1_result, h2_result}: if os.path.exists(filename): os.remove(filename) h3_proc = multiprocessing.Process(target=setup_mininet_iperf_server, args=(h3, tcp_port1, tcp_port2, configs)) h1_proc = multiprocessing.Process(target=setup_client, args=(h1, h3, configs, tcp_port1, h1_result)) if configs.h2: h2 = net.get("h2") h2_proc = multiprocessing.Process(target=setup_client, args=(h2, h3, configs, tcp_port2, h2_result, configs.h2_cc)) else: h2_proc = None h3_proc.start() time.sleep(2 if configs.h2 else 0.5) h1_proc.start() if configs.h2: h2_proc.start() processes = [h3_proc, h1_proc, h2_proc] return processes
def save_product(product_info, imgs_no_downloand): from models import GameImage, PricesGame, Game import requests prices = None imgs_downloand = None game = None try: if 'gift' not in product_info: product_info['gift'] = None if 'stock' not in product_info: from product import STOCK_CHOICE product_info['stock'] = STOCK_CHOICE.get('reserva') if 'pegi' not in product_info: product_info['pegi'] = None if not Game.objects.filter(Q(name=product_info['title']) & Q(plataform=product_info['platform'])).exists(): imgs_downloand = [] if imgs_no_downloand: for img in imgs_no_downloand: filename = get_filename(img) request_imagen = requests.get(img) if request_imagen.status_code is 200: image = GameImage(name=product_info['title']) image.save_image(filename, request_imagen.content) imgs_downloand.append(image) product_info['imagenes'] = imgs_downloand if imgs_downloand else None img = product_info['src'] filename = get_filename(img) request = requests.get(img) if request.status_code is 200: image = GameImage(name=product_info['title'][:15]) image.save_image("main." + filename.split('.')[1], request.content) product_info['imagen'] = image else: product_info['imagen'] = None prices = PricesGame() prices.add_price(product_info) product_info['prices'] = prices game = Game() game.add_game(product_info) else: prices = PricesGame() prices.add_price(product_info) product_info['prices'] = prices game = Game.objects.get(Q(name=product_info['title'])&Q(plataform=product_info['platform'])) game.prices.add(prices) except Exception: if prices: prices.delete() if 'imagen' in product_info: product_info['imagen'].delete() if imgs_downloand: for imagen in imgs_downloand: imagen.delete() if game: game.delete()
def check_output(configs): # check if we generate the outputs properly h1_result = get_filename("h1", configs) h2_result = get_filename("h2", configs) results = [h1_result] if configs.h2: results.append(h2_result) for filename in results: get_iperf_metrics(filename)
def main(): with open(get_filename()) as file: inp = sorted([int(line.strip()) for line in file]) inp = [0, *inp, inp[-1] + 3] print(part1(inp)) print(part2(inp))
def main(): with open(get_filename()) as file: inp = [(line[0], int(line[1:])) for line in (line.strip() for line in file)] print("PART 1:", solve(inp, 1 + 0j, "a")) print("PART 2:", solve(inp, 10 + 1j, "b"))
def mapper(self, _, page): with open(os.path.join(data_path, 'links', get_filename(page)), mode='r', encoding='utf-8') as f: links = sum(1 for _ in f) yield 'degree', links yield 'max', links yield 'min', links
def main(): args = parse_args() threshold1 = args.Tlow threshold2 = 2 * args.Tlow # Canny recommends a ratio of 1:2 win1 = args.win1 win2 = args.win2 imgsdir = args.imgsdir if not os.path.isdir(imgsdir): imgpaths = [imgsdir] else: imgpaths = util.get_imgpaths(imgsdir, n=args.n) for i, imgpath in enumerate(imgpaths): print("({0}/{1}): Image={2}".format(i + 1, len(imgpaths), imgpath)) I = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_GRAYSCALE) line1, line2 = detect_lanes(I, threshold1=threshold1, threshold2=threshold2, apertureSize=args.ksize) if line1 == None and line2 == None: print(" Error: Couldn't find lanes.") continue if line1 == None: print(" Error: Couldn't find left lane") if line2 == None: print(" Error: Couldn't find right lane.") #Irgb = plot_lines(I, line1, line2) Irgb = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_COLOR) Irgb = util_camera.draw_line(Irgb, line1, (255, 0, 0)) Irgb = util_camera.draw_line(Irgb, line2, (0, 255, 0)) # Draw subwindows on image Irgb = draw_subwindow(Irgb, win1, colour=(255, 0, 0)) Irgb = draw_subwindow(Irgb, win2, colour=(0, 255, 0)) cv2.imwrite('{0}_lines.png'.format(util.get_filename(imgpath)), Irgb) print " LeftLane: {0} RightLane: {1}".format(line1, line2) print("Done.")
def main(): args = parse_args() threshold1 = args.Tlow threshold2 = 2 * args.Tlow # Canny recommends a ratio of 1:2 win1 = args.win1 win2 = args.win2 imgsdir = args.imgsdir if not os.path.isdir(imgsdir): imgpaths = [imgsdir] else: imgpaths = util.get_imgpaths(imgsdir, n=args.n) for i, imgpath in enumerate(imgpaths): print("({0}/{1}): Image={2}".format(i+1, len(imgpaths), imgpath)) I = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_GRAYSCALE) line1, line2 = detect_lanes(I, threshold1=threshold1, threshold2=threshold2, apertureSize=args.ksize) if line1 == None and line2 == None: print(" Error: Couldn't find lanes.") continue if line1 == None: print(" Error: Couldn't find left lane") if line2 == None: print(" Error: Couldn't find right lane.") #Irgb = plot_lines(I, line1, line2) Irgb = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_COLOR) Irgb = util_camera.draw_line(Irgb, line1, (255, 0, 0)) Irgb = util_camera.draw_line(Irgb, line2, (0, 255, 0)) # Draw subwindows on image Irgb = draw_subwindow(Irgb, win1, colour=(255, 0, 0)) Irgb = draw_subwindow(Irgb, win2, colour=(0, 255, 0)) cv2.imwrite('{0}_lines.png'.format(util.get_filename(imgpath)), Irgb) print " LeftLane: {0} RightLane: {1}".format(line1, line2) print("Done.")
def main(): with open(get_filename()) as file: inp = [int(line.strip()) for line in file] invalid_num = part1(inp, 25) print(invalid_num) print(part2(inp, invalid_num))
def main(): with open(get_filename()) as file: inp = [line.strip() for line in file] print(slide(inp, 3, 1)) print( slide(inp, 1, 1) * slide(inp, 3, 1) * slide(inp, 5, 1) * slide(inp, 7, 1) * slide(inp, 1, 2))
def delete_redirect_files(redirects: dict): ''' Remove all files only containing redirects ''' for redirect in redirects.keys(): filename = os.path.join(root, links_path, get_filename(redirect)) if os.path.exists(filename): os.remove(filename)
def main(): with open(get_filename()) as file: inp = file.readlines() entries = [ int(entry) if entry != "x" else None for entry in inp[1].split(",") ] print("PART 1:", part1(int(inp[0]), entries)) print("PART 2:", part2(entries))
def main(): with open(get_filename()) as file: content = file.readlines() inp = [ Node.from_string(line.strip().replace(" ", "")) for line in content ] print("PART 1:", part1(inp)) print("PART 2:", part2(content))
def main(): with open(get_filename()) as file: parts = file.read().split("\n\n") fields = [Field(field) for field in parts[0].split("\n")] my_ticket = Ticket(parts[1].split("\n")[1], fields) tickets = [Ticket(ticket, fields) for ticket in parts[2].split("\n")[1:] if ticket] print("PART 1:", part1(tickets)) print("PART 2:", part2(my_ticket, [ticket for ticket in tickets if ticket.valid]))
def main(): with open(get_filename()) as file: instructions: ty.List[ty.List[ty.Union[str, ty.Tuple[int, int]]]] = [] for line in file: if line.startswith("mask"): instructions.append([mask_pattern.match(line)["mask"]]) else: match = mem_pattern.match(line) instructions[-1].append((int(match["address"]), int(match["val"]))) print("PART 1:", part1(instructions)) print("PART 2:", part2(instructions))
def mapper(self, _, name): row = set() filename = get_filename(name) with open(main_path + "links/" + filename, "r", encoding="utf-8") as file: for line in file: line = line.rstrip() if line in indexes: row.add(indexes[line]) yield indexes[name], list(row)
def merge_log_to_missnp(output_file): """ Takes in a .log file and a .missnp file and merges them together. There should be a separate output file of the two files merged together. :return: The name of the merged files, or an empty string if the log file and a .missnp file is not present. :rtype: String :param output_file: The output flag passed in as a command line argument. """ output_file_root_dir = util.get_root_path(output_file) output_file_name = util.get_filename(output_file) input_logfile = '{}.log'.format(output_file) input_missnp = '{}-merge.missnp'.format(output_file) merged_missnp_output = '{}_{}'.format(output_file, 'MERGED_LOG_MISSNP.txt') merged_missnp_output_lines = list() missing_logfile = False missing_missnp_file = False try: with open(input_missnp, 'r') as missnp: merged_missnp_output_lines += missnp.readlines() except FileNotFoundError: missing_missnp_file = True print('.missnp file [ {} ] does not exist. Excluding from merge...'. format(input_missnp)) try: with open(input_logfile, 'r') as logfile_in: for line in logfile_in: if line.startswith('Warning:'): rs_id = re.search( 'rs[0-9]+', line) # regular expression to grab rsID's if rs_id: rs_id = rs_id.group(0) # id = line.split('rs', 1)[1] # gets the snp id rs_id = rs_id.strip('\n') rs_id = rs_id.strip("'.") merged_missnp_output_lines.append( rs_id + '\n') # append to missnp file except FileNotFoundError: print('Log file [ {} ] does not exist. '.format(input_logfile)) missing_logfile = True if (missing_missnp_file and missing_logfile) or len(merged_missnp_output_lines) < 1: return '' else: with open(merged_missnp_output, 'w+') as merged_output: for line in merged_missnp_output_lines: merged_output.write(line) return merged_missnp_output
def main(): with open(get_filename()) as file: lines = file.readlines() active_states_dim3 = set() active_states_dim4 = set() for x, rows in enumerate(lines): for y, elem in enumerate(rows): if elem == "#": active_states_dim3.add((x, y, 0)) active_states_dim4.add((x, y, 0, 0)) print("PART 1:", solve(active_states_dim3, 6)) print("PART 2:", solve(active_states_dim4, 6))
def main(): global pattern with open(get_filename()) as file: content = file.read().split("\n\n") rules = { int(index): parse_rule(rule) for index, rule in map(lambda r: r.split(":"), content[0].split("\n")) } messages = [message for message in content[1].split("\n")] print("PART 1:", solve(rules, messages)) rules[8] = [[42], [42, 8]] rules[11] = [[42, 31], [42, 11, 31]] pattern = "" print("PART 2:", solve(rules, messages))
def swap_redirects(pages: list): ''' Swap redirects for each page ''' for page in pages: filename = os.path.join(root, links_path, get_filename(page)) if os.path.exists(filename): with open(filename, 'r', encoding='utf-8') as f: links = f.read().split('\n') for i, link in enumerate(links): if link in redirects: links[i] = redirects[link] with open(filename, 'w', encoding='utf-8') as f: f.write('\n'.join(links))
def run_model(cl): """ @param cl: The command line index from which to consider the model command Second one has to be one of """ try: cl1 = str(sys.argv[cl]) cl2 = str(sys.argv[cl+1]) except IndexError: command_line_syntax('Please choose a model!') sys.exit(0) assert cl1 == '-m', command_line_syntax('You must enter -m to choose the model!') assert cl2 in valid_models, command_line_syntax('You have chosen an invalid model!') # First read in the data print 'Reading in data...', with open(util.get_filename(), 'r') as f: dataset = pd.read_csv(f) print 'done!' # Then create the features # X_train, y_train, X_test, y_test = get_features(dataset, max_features=5000) X_train, y_train, X_test, y_test = numerical_features(dataset) # Then run models based on what the argument says if cl2 == 'log': print 'Training logistic regression model...' logC = train_logistic(X_train, y_train, X_test, y_test) elif cl2 == 'rfc': print 'Training random forest classifier...' RFC = trainRandomForest(X_train, y_train, X_test, y_test) elif cl2 == 'nn': print 'Training Neural Net...' NN = trainNeuralNet(X_train, y_train, X_test, y_test) elif cl2 == 'baseline': training_set = [(x,y) for x,y in zip(X_train, y_train)] blC = classifiers.Baseline(training_set, class_labels=range(10), debug=True) blC.stochastic_grad_descent() y_pred = numpy.array([blC.predict(x) for x in X_test]) # print y_pred # print y_test print classification_report(y_test, y_pred) print "accuracy score =", accuracy_score(y_test, y_pred)
def get_rsIDs_from_dataset(dataset, num_rsids=0): """ Removes '.' from the binary file input. :param num_rsids: The number of rsids to extract from the datasets :rtype: str :param dataset: The path to the .bed .bim .fam files whose '.' as rsIDs to be removed. """ root_path = util.get_root_path(dataset) dataset_filename = util.get_filename(dataset) dataset_bim = util.get_bed_bim_fam_from_bfile(dataset)['bim'] temp_extract_file = 'extract_{}.txt'.format(dataset_filename) output_file = '{}{}_{}'.format(root_path, dataset_filename, 'RS_ONLY') output_lines = set() with open(dataset_bim, 'r') as input_file: file_lines = input_file.readlines() if num_rsids > 0: file_lines = file_lines[:num_rsids] for line in file_lines: # print(line) if '.' not in line and not line.startswith('MT'): rs_id = re.search('rs[0-9]+', line) if rs_id: rs_id = rs_id.group(0).strip() output_lines.add(rs_id + '\n') with open(temp_extract_file, 'w+') as output: for line in output_lines: output.write(line) get_rs_ids_command = { 'bfile': dataset, 'extract': temp_extract_file, 'out': output_file } util.call_plink( get_rs_ids_command, command_key='Get only rsIDs from input .bim file [ {} ]'.format( dataset_filename)) # os.remove(temp_extract_file) return output_file
def main(): with open(get_filename()) as file: inp = [line.strip() for line in file] print(max(get_seats(inp))) print(get_missing_seat(inp))
def process_file(filepath, config, lang, subclasses, classes, depends): filename = util.get_filename(filepath) if filepath.endswith(".hpp.inc"): special(filepath, filename[:-len(".hpp.inc")], "hinc", classes, depends) return if filepath.endswith(".cpp.inc"): special(filepath, filename[:-len(".cpp.inc")], "cinc", classes, depends) return fileroot = os.path.splitext(filename)[0] current_enum = "" current_class = "" json = False lineno = 1 for line in preprocess_file(filepath): lineno += 1 fields = line.split() if regex.match(r"^ *$", line): continue # Find any enums in the java class # match = regex.match(r" *(public|private) enum .*", line) if match: current_enum = fields[2] if fileroot in config.EnumMap and current_enum in config.EnumMap[ fileroot]: current_enum = config.EnumMap[fileroot][current_enum] classes[current_enum].class_name = current_enum classes[current_enum].enum = True classes[current_enum].elements = [] if current_enum != "": line = regex.sub(r"\([^)]+\)", " ", line) fields = line.split() match = regex.match(r"^[A-Z0-9_]+[,;]?$", fields[0]) if current_enum != "" and match: line = regex.sub(r"//.*$", "", line) line = regex.sub(r"[{;]", "", line) names = line.split(",") for name in names: name = name.strip() if name == "": continue name = name.split()[0] classes[current_enum].elements.append( util.attrdict(element=name)) classes[current_enum].elements[-1]._N = len( classes[current_enum].elements) match = regex.match(r".*;$|^}$|^};$", line) if current_enum != "" and match: classes[current_enum].elements[-1]._last = True current_enum = "" # Use the JsonCreator as the definition of a class # match = regex.match(r".*@JsonCreator.*", line) if match: json = True line = regex.sub(r"[()]", " ", line) fields = line.split() match = regex.match(r" *public.*", line) if json and match: current_class = fields[1] if fields[1] != "static" else fields[2] classes[current_class].class_name = current_class classes[current_class].struct = True classes[current_class].fields = [] if current_class in subclasses: classes[current_class].subclass = True classes[current_class].super_class = subclasses[ current_class].super classes[current_class].json_key = subclasses[current_class].key match = regex.match(r" *@JsonProperty.*", line) if json and match and len(fields) >= 3: line = regex.sub(r"^[^@]*", "", line) line = regex.sub(r"@Nullable", "", line) fields = line.split() fields[-1] = regex.sub(r",", "", fields[-1]) if fields[1][0] == '"': type = " ".join(fields[2:-1]) name = regex.sub('"', "", fields[1]) else: type = " ".join(fields[1:-1]) name = fields[-1] add_field(current_class, fileroot, name, type, config, lang, classes, depends) match = regex.match(r" *{ *", line) if json and match: add_extra(current_class, fileroot, config, lang, classes, depends) if len(classes[current_class].fields) == 0: classes.pop(current_class) json = False continue json = False return classes
def mapper(self, _, page): with open(os.path.join(data_path, 'links', get_filename(page)), mode='r', encoding='utf-8') as f: yield 'lines', sum(1 for _ in f)
def main(): with open(get_filename()) as file: inp = [group.splitlines() for group in file.read().split("\n\n")] print(parse(inp, set.union)) print(parse(inp, set.intersection))
def train_model(args: dict, hparams: dict): # Code for this function adopted from https://mccormickml.com/2019/07/22/BERT-fine-tuning/ pos_file = args.pos_file neg_file = args.neg_file truncation = args.truncation n_samples = args.n_samples seed_val = hparams["seed_val"] device = util.get_device(device_no=args.device_no) saves_dir = "saves/" Path(saves_dir).mkdir(parents=True, exist_ok=True) time = datetime.datetime.now() saves_path = os.path.join(saves_dir, util.get_filename(time)) Path(saves_path).mkdir(parents=True, exist_ok=True) log_path = os.path.join(saves_path, "training.log") logging.basicConfig(filename=log_path, filemode='w', format='%(name)s - %(levelname)s - %(message)s') logger = logging.getLogger() logger.setLevel(logging.DEBUG) logger.info("Pos file: " + str(pos_file)) logger.info("Neg file: " + str(neg_file)) logger.info("Parameters: " + str(args)) logger.info("Truncation: " + truncation) # Load the BERT tokenizer. logger.info('Loading BERT tokenizer...') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) max_len = 0 reviews, labels = util.read_samples_new(filename0=neg_file, filename1=pos_file, seed_val=seed_val, n_samples=n_samples, sentence_flag=True) print(len(reviews), len(labels)) # Tokenize all of the sentences and map the tokens to thier word IDs. input_ids = [] attention_masks = [] # For every sentence... for rev in reviews: # `encode_plus` will: # (1) Tokenize the sentence. # (2) Prepend the `[CLS]` token to the start. # (3) Append the `[SEP]` token to the end. # (4) Map tokens to their IDs. # (5) Pad or truncate the sentence to `max_length` # (6) Create attention masks for [PAD] tokens. input_id = tokenizer.encode(rev, add_special_tokens=True) if len(input_id) > 512: if truncation == "tail-only": # tail-only truncation input_id = [tokenizer.cls_token_id] + input_id[-511:] elif truncation == "head-and-tail": # head-and-tail truncation input_id = [tokenizer.cls_token_id ] + input_id[1:129] + input_id[-382:] + [ tokenizer.sep_token_id ] else: # head-only truncation input_id = input_id[:511] + [tokenizer.sep_token_id] input_ids.append(torch.tensor(input_id).view(1, -1)) attention_masks.append( torch.ones([1, len(input_id)], dtype=torch.long)) else: encoded_dict = tokenizer.encode_plus( rev, # Sentence to encode. add_special_tokens=True, # Add '[CLS]' and '[SEP]' max_length=512, # Pad & truncate all sentences. pad_to_max_length=True, return_attention_mask=True, # Construct attn. masks. return_tensors='pt', # Return pytorch tensors. ) # Add the encoded sentence to the list. input_ids.append(encoded_dict['input_ids']) # And its attention mask (simply differentiates padding from non-padding). attention_masks.append(encoded_dict['attention_mask']) # Convert the lists into tensors. input_ids = torch.cat(input_ids, dim=0) attention_masks = torch.cat(attention_masks, dim=0) labels = torch.tensor(labels) # Combine the training inputs into a TensorDataset. dataset = TensorDataset(input_ids, attention_masks, labels) # Create a 90-10 train-validation split. # Calculate the number of samples to include in each set. train_size = int(0.9 * len(dataset)) val_size = len(dataset) - train_size # Divide the dataset by randomly selecting samples. train_dataset, val_dataset = random_split(dataset, [train_size, val_size]) logger.info('{:>5,} training samples'.format(train_size)) logger.info('{:>5,} validation samples'.format(val_size)) # The DataLoader needs to know our batch size for training, so we specify it # here. For fine-tuning BERT on a specific task, the authors recommend a batch # size of 16 or 32. batch_size = hparams["batch_size"] # Create the DataLoaders for our training and validation sets. # We'll take training samples in random order. train_dataloader = DataLoader( train_dataset, # The training samples. sampler=RandomSampler(train_dataset), # Select batches randomly batch_size=batch_size # Trains with this batch size. ) # For validation the order doesn't matter, so we'll just read them sequentially. validation_dataloader = DataLoader( val_dataset, # The validation samples. sampler=SequentialSampler( val_dataset), # Pull out batches sequentially. batch_size=batch_size # Evaluate with this batch size. ) # Load BertForSequenceClassification, the pretrained BERT model with a single # linear classification layer on top. model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab. num_labels= 2, # The number of output labels--2 for binary classification. # You can increase this for multi-class tasks. output_attentions=False, # Whether the model returns attentions weights. output_hidden_states= False, # Whether the model returns all hidden-states. ) # Tell pytorch to run this model on the GPU. model = model.to(device=device) # model.cuda(device=device) # Note: AdamW is a class from the huggingface library (as opposed to pytorch) # I believe the 'W' stands for 'Weight Decay fix" optimizer = AdamW( model.parameters(), lr=hparams[ "learning_rate"], # args.learning_rate - default is 5e-5, our notebook had 2e-5 eps=hparams["adam_epsilon"] # args.adam_epsilon - default is 1e-8. ) # Number of training epochs. The BERT authors recommend between 2 and 4. # We chose to run for 4, but we'll see later that this may be over-fitting the # training data. epochs = 4 # Total number of training steps is [number of batches] x [number of epochs]. # (Note that this is not the same as the number of training samples). total_steps = len(train_dataloader) * epochs # Create the learning rate scheduler. scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, # Default value in run_glue.py num_training_steps=total_steps) # This training code is based on the `run_glue.py` script here: # https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128 # Set the seed value all over the place to make this reproducible. random.seed(seed_val) np.random.seed(seed_val) torch.manual_seed(seed_val) torch.cuda.manual_seed_all(seed_val) # We'll store a number of quantities such as training and validation loss, # validation accuracy, and timings. training_stats = [] # For each epoch... for epoch_i in range(0, epochs): # ======================================== # Training # ======================================== # Perform one full pass over the training set. logger.info("") logger.info('======== Epoch {:} / {:} ========'.format( epoch_i + 1, epochs)) logger.info('Training...') # Reset the total loss for this epoch. total_train_loss = 0 # Put the model into training mode. Don't be mislead--the call to # `train` just changes the *mode*, it doesn't *perform* the training. # `dropout` and `batchnorm` layers behave differently during training # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch) model.train() # For each batch of training data... for step, batch in enumerate(train_dataloader): # Progress update every 40 batches. if step % 40 == 0 and not step == 0: # Report progress. logger.info(' Batch {:>5,} of {:>5,}. '.format( step, len(train_dataloader))) # Unpack this training batch from our dataloader. # # As we unpack the batch, we'll also copy each tensor to the GPU using the # `to` method. # # `batch` contains three pytorch tensors: # [0]: input ids # [1]: attention masks # [2]: labels b_input_ids = batch[0].to(device) b_input_mask = batch[1].to(device) b_labels = batch[2].to(device) # Always clear any previously calculated gradients before performing a # backward pass. PyTorch doesn't do this automatically because # accumulating the gradients is "convenient while training RNNs". # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch) model.zero_grad() # Perform a forward pass (evaluate the model on this training batch). # The documentation for this `model` function is here: # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification # It returns different numbers of parameters depending on what arguments # arge given and what flags are set. For our useage here, it returns # the loss (because we provided labels) and the "logits"--the model # outputs prior to activation. loss, logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels) # Accumulate the training loss over all of the batches so that we can # calculate the average loss at the end. `loss` is a Tensor containing a # single value; the `.item()` function just returns the Python value # from the tensor. total_train_loss += loss.detach().cpu().numpy() # Perform a backward pass to calculate the gradients. loss.backward() # Clip the norm of the gradients to 1.0. # This is to help prevent the "exploding gradients" problem. torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # Update parameters and take a step using the computed gradient. # The optimizer dictates the "update rule"--how the parameters are # modified based on their gradients, the learning rate, etc. optimizer.step() # Update the learning rate. scheduler.step() # Calculate the average loss over all of the batches. avg_train_loss = total_train_loss / len(train_dataloader) logger.info("") logger.info(" Average training loss: {0:.2f}".format(avg_train_loss)) # ======================================== # Validation # ======================================== # After the completion of each training epoch, measure our performance on # our validation set. logger.info("") logger.info("Running Validation...") # Put the model in evaluation mode--the dropout layers behave differently # during evaluation. model.eval() # Tracking variables total_eval_accuracy = 0 total_eval_loss = 0 # Evaluate data for one epoch for batch in validation_dataloader: # Unpack this training batch from our dataloader. # # As we unpack the batch, we'll also copy each tensor to the GPU using # the `to` method. # # `batch` contains three pytorch tensors: # [0]: input ids # [1]: attention masks # [2]: labels b_input_ids = batch[0].to(device) b_input_mask = batch[1].to(device) b_labels = batch[2].to(device) # Tell pytorch not to bother with constructing the compute graph during # the forward pass, since this is only needed for backprop (training). with torch.no_grad(): # Forward pass, calculate logit predictions. # token_type_ids is the same as the "segment ids", which # differentiates sentence 1 and 2 in 2-sentence tasks. # The documentation for this `model` function is here: # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification # Get the "logits" output by the model. The "logits" are the output # values prior to applying an activation function like the softmax. (loss, logits) = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels) # Accumulate the validation loss. total_eval_loss += loss.detach().cpu().numpy() # Move logits and labels to CPU logits = logits.detach().cpu().numpy() label_ids = b_labels.to('cpu').numpy() # Calculate the accuracy for this batch of test sentences, and # accumulate it over all batches. total_eval_accuracy += flat_accuracy(logits, label_ids) # Report the final accuracy for this validation run. avg_val_accuracy = total_eval_accuracy / len(validation_dataloader) logger.info(" Accuracy: {0:.2f}".format(avg_val_accuracy)) # Calculate the average loss over all of the batches. avg_val_loss = total_eval_loss / len(validation_dataloader) logger.info(" Validation Loss: {0:.2f}".format(avg_val_loss)) # Record all statistics from this epoch. training_stats.append({ 'epoch': epoch_i + 1, 'Training Loss': avg_train_loss, 'Valid. Loss': avg_val_loss, 'Valid. Accur.': avg_val_accuracy, }) model_save_path = os.path.join(saves_path, "model_" + str(epoch_i + 1) + "epochs") torch.save(model, model_save_path) logger.info("") logger.info("Training complete!") handlers = logger.handlers[:] for handler in handlers: handler.close() logger.removeHandler(handler)
def load(self): levels = self.get_levels() if self.level_id > len(levels) - 1: self.app.set_state(main.WIN) self.app.score_manager.save() else: self.app.gui_manager.set_state(gui_manager.FADE_IN) self.app.game_manager.clear_level() self.level_name = levels[self.level_id] self.app.gui_manager.update_times( self.app.score_manager.run_scores.get( util.get_filename(self.mode, self.level_name), 0), self.app.score_manager.get_record(self.mode, self.level_name)) # map_data = levels.levels[self.level_id]() if self.mode == 0: directory = 'levels' elif self.mode == 1: directory = 'survival' with open('{}/{}.dat'.format(directory, self.level_name), 'rb') as f: map_data = pickle.load(f) scene = self.app.renderer.scene model_name, texture = map_data['terrain'] self.app.game_manager.terrain = terrain.Terrain( self.app, model_name, texture) scene.add(self.app.game_manager.terrain.canvas) self.app.game_manager.player.spawn(map_data['spawn_pos']) if self.mode == 0: self.app.game_manager.goal.spawn(map_data['goal_pos']) elif self.mode == 1: self.app.game_manager.goal.despawn() for data in map_data['buildings']: b = building.Building(self.app, building.Building.data[data[0]], data[1:4], data[4]) # b = building.Building(self.app, building.Building.data[data[0]], data[1:4], 0) self.app.game_manager.game_objects.add(b) scene.add(b.canvas) for data in map_data['platforms']: if data[0] == 0: platform.Hedge(self.app, data[1:4]) elif data[0] == 1: platform.InvisiblePlatform(self.app, data[1:4]) elif data[0] == 2: platform.LavaPlatform(self.app, data[1:4]) elif data[0] == 3: platform.Trampoline(self.app, data[1:4]) for data in map_data['elevators']: e = elevator.Elevator(self.app, data[1:4], data[4]) self.app.game_manager.game_objects.add(e) # spawn later scene.add(e.canvas) for data in map_data['powerups']: if data[0] == 0: e = powerup.Fuel(self.app, data[1:4]) self.app.game_manager.game_objects.add(e) # spawn later scene.add(e.canvas) elif data[0] == 1: e = powerup.Health(self.app, data[1:4]) self.app.game_manager.game_objects.add(e) # spawn later scene.add(e.canvas) elif data[0] == 2: e = powerup.SlowTime(self.app, data[1:4]) self.app.game_manager.game_objects.add(e) # spawn later scene.add(e.canvas) for data in map_data['vehicles']: if data[0] == 0: v = car.Car(self.app) elif data[0] == 1: v = helicopter.Helicopter(self.app) v.spawn(data[1:4]) for data in map_data['enemies']: if data[0] == 0: enemy.Turret(self.app, data[1:4]) elif data[0] == 1: enemy.Bee(self.app, data[1:4]) elif data[0] == 2: enemy.BowlSpawner(self.app, data[1:4]) elif data[0] == 3: enemy.InvisibleEnemy(self.app, data[1:4]) self.app.game_manager.set_state(self.mode)
def get_level_name(self): return util.get_filename(self.mode, self.level_name)
def fit_model(data, n_topics, iterations, passes, min_prob, eval_every, n_best, min_df, max_df, preserved_words): dt = cur_date() output_folder = "lda_%stopics_%s" % (n_topics, dt) os.makedirs(output_folder, exist_ok=True) os.makedirs("%s/separate" % output_folder, exist_ok=True) logging.info("creating corpus...") dictionary, corpus = make_corpus(list(data.values()), min_df, max_df, preserved_words, output_folder) # generate LDA model logging.info("training model...") lda = LdaModel(corpus, num_topics=n_topics, id2word=dictionary, iterations=iterations, passes=passes, minimum_probability=min_prob, eval_every=eval_every) logging.info("saving model...") lda.save('saved/lda_%s_%s.serialized' % (n_topics, dt)) # print(lda.print_topics(num_topics=n_topics, num_words=4)) # save all-vs-all pairwise similarities logging.info("creating index...") index = Similarity('./sim_index', lda[corpus], num_features=n_topics, num_best=n_best + 1) paths = list(data.keys()) logging.info("write all similarities to result file") with open('%s/similarities.txt' % output_folder, 'w') as res_file: with open('%s/similarities_summary.txt' % output_folder, 'w', encoding='utf-8') as res_file_sum: for i, similarities in enumerate(index): cur_fname = get_filename(paths[i]) top_similar = [(paths[s[0]], s[1]) for s in similarities if s[0] != i] res_file.write('%s: %s\n' % (cur_fname, [(get_filename(p), c) for (p, c) in top_similar])) res_file_sum.write('%s: %s\n' % (cur_fname, get_title(paths[i]))) for sim in top_similar: res_file_sum.write( '%s: %s' % (get_filename(sim[0]), get_title(sim[0]))) res_file_sum.write('-' * 100 + '\n') # for each doc we make separate file which containts list of similar docs with open( '%s/separate/%s.txt' % (output_folder, cur_fname.split('.')[0]), 'w') as sep_res: for sim in top_similar: sep_res.write('%s\n' % get_filename(sim[0])) logging.info("save index") index.save('saved/lda_index_%s.index' % dt) # save topic - words matrix with open("%s/topic_words.txt" % output_folder, 'w', encoding='utf-8') as f: for topic_words in lda.print_topics(lda.num_topics): f.write("#%s: %s\n" % (topic_words[0], topic_words[1])) # save document - topics matrix with open("%s/document_topics.txt" % output_folder, 'w') as f: for i, topics in enumerate(lda[corpus]): f.write("#%s: %s\n" % (get_filename(paths[i]), topics)) # save dictionary dictionary.save_as_text("%s/dictionary.txt" % output_folder)
def get_pair(prefix): files = glob.glob(weatherdir + 'dd_temp_eng/{0}*.csv'.format(prefix)) pairs = [get_bs(util.get_filename(x)) for x in files] return pairs