Esempio n. 1
0
def setup_nodes(net: mininet.net.Mininet, configs):
    # hardcode some stuff here
    tcp_port1 = 9998
    tcp_port2 = 9999 if configs.h2 else None
    h1 = net.get("h1")
    h3 = net.get("h3")
    h1_result = get_filename(h1, configs)
    h2_result = get_filename("h2", configs)
    # need to remove this file if already exists
    for filename in {h1_result, h2_result}:
        if os.path.exists(filename):
            os.remove(filename)
    h3_proc = multiprocessing.Process(target=setup_mininet_iperf_server,
                                      args=(h3, tcp_port1, tcp_port2, configs))
    h1_proc = multiprocessing.Process(target=setup_client,
                                      args=(h1, h3, configs, tcp_port1,
                                            h1_result))
    if configs.h2:
        h2 = net.get("h2")
        h2_proc = multiprocessing.Process(target=setup_client,
                                          args=(h2, h3, configs, tcp_port2,
                                                h2_result, configs.h2_cc))
    else:
        h2_proc = None

    h3_proc.start()
    time.sleep(2 if configs.h2 else 0.5)
    h1_proc.start()
    if configs.h2:
        h2_proc.start()

    processes = [h3_proc, h1_proc, h2_proc]
    return processes
Esempio n. 2
0
def save_product(product_info, imgs_no_downloand):
    from models import GameImage, PricesGame, Game
    import requests
    prices = None
    imgs_downloand = None
    game = None
    try:
        if 'gift' not in product_info:
            product_info['gift'] = None
        if 'stock' not in product_info:
            from product import STOCK_CHOICE
            product_info['stock'] = STOCK_CHOICE.get('reserva')
        if 'pegi' not in product_info:
            product_info['pegi'] = None

        if not Game.objects.filter(Q(name=product_info['title']) &
                Q(plataform=product_info['platform'])).exists():
            imgs_downloand = []
            if imgs_no_downloand:
                for img in imgs_no_downloand:
                    filename = get_filename(img)
                    request_imagen = requests.get(img)
                    if request_imagen.status_code is 200:
                        image = GameImage(name=product_info['title'])
                        image.save_image(filename, request_imagen.content)
                        imgs_downloand.append(image)
            product_info['imagenes'] = imgs_downloand if imgs_downloand else None
            img = product_info['src']
            filename = get_filename(img)
            request = requests.get(img)
            if request.status_code is 200:
                image = GameImage(name=product_info['title'][:15])
                image.save_image("main." + filename.split('.')[1], request.content)
                product_info['imagen'] = image
            else:
                product_info['imagen'] = None
            prices = PricesGame()
            prices.add_price(product_info)
            product_info['prices'] = prices
            game = Game()
            game.add_game(product_info)
        else:

            prices = PricesGame()
            prices.add_price(product_info)
            product_info['prices'] = prices
            game = Game.objects.get(Q(name=product_info['title'])&Q(plataform=product_info['platform']))
            game.prices.add(prices)
    except Exception:
        if prices:
            prices.delete()
        if 'imagen' in product_info:
            product_info['imagen'].delete()
        if imgs_downloand:
            for imagen in imgs_downloand:
                imagen.delete()
        if game:
            game.delete()
Esempio n. 3
0
def check_output(configs):
    # check if we generate the outputs properly
    h1_result = get_filename("h1", configs)
    h2_result = get_filename("h2", configs)
    results = [h1_result]
    if configs.h2:
        results.append(h2_result)
    for filename in results:
        get_iperf_metrics(filename)
Esempio n. 4
0
def main():
    with open(get_filename()) as file:
        inp = sorted([int(line.strip()) for line in file])
        inp = [0, *inp, inp[-1] + 3]

    print(part1(inp))
    print(part2(inp))
Esempio n. 5
0
def main():
    with open(get_filename()) as file:
        inp = [(line[0], int(line[1:]))
               for line in (line.strip() for line in file)]

    print("PART 1:", solve(inp, 1 + 0j, "a"))
    print("PART 2:", solve(inp, 10 + 1j, "b"))
Esempio n. 6
0
    def mapper(self, _, page):
        with open(os.path.join(data_path, 'links', get_filename(page)), mode='r', encoding='utf-8') as f:
            links = sum(1 for _ in f)

            yield 'degree', links
            yield 'max', links
            yield 'min', links
Esempio n. 7
0
def main():
    args = parse_args()
    threshold1 = args.Tlow
    threshold2 = 2 * args.Tlow  # Canny recommends a ratio of 1:2
    win1 = args.win1
    win2 = args.win2
    imgsdir = args.imgsdir
    if not os.path.isdir(imgsdir):
        imgpaths = [imgsdir]
    else:
        imgpaths = util.get_imgpaths(imgsdir, n=args.n)
    for i, imgpath in enumerate(imgpaths):
        print("({0}/{1}): Image={2}".format(i + 1, len(imgpaths), imgpath))
        I = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_GRAYSCALE)
        line1, line2 = detect_lanes(I,
                                    threshold1=threshold1,
                                    threshold2=threshold2,
                                    apertureSize=args.ksize)
        if line1 == None and line2 == None:
            print("    Error: Couldn't find lanes.")
            continue
        if line1 == None:
            print("    Error: Couldn't find left lane")
        if line2 == None:
            print("    Error: Couldn't find right lane.")
        #Irgb = plot_lines(I, line1, line2)
        Irgb = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_COLOR)
        Irgb = util_camera.draw_line(Irgb, line1, (255, 0, 0))
        Irgb = util_camera.draw_line(Irgb, line2, (0, 255, 0))
        # Draw subwindows on image
        Irgb = draw_subwindow(Irgb, win1, colour=(255, 0, 0))
        Irgb = draw_subwindow(Irgb, win2, colour=(0, 255, 0))
        cv2.imwrite('{0}_lines.png'.format(util.get_filename(imgpath)), Irgb)
        print "    LeftLane: {0}    RightLane: {1}".format(line1, line2)
    print("Done.")
Esempio n. 8
0
def main():
    args = parse_args()
    threshold1 = args.Tlow
    threshold2 = 2 * args.Tlow    # Canny recommends a ratio of 1:2
    win1 = args.win1
    win2 = args.win2
    imgsdir = args.imgsdir
    if not os.path.isdir(imgsdir):
        imgpaths = [imgsdir]
    else:
        imgpaths = util.get_imgpaths(imgsdir, n=args.n)
    for i, imgpath in enumerate(imgpaths):
        print("({0}/{1}): Image={2}".format(i+1, len(imgpaths), imgpath))
        I = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_GRAYSCALE)
        line1, line2 = detect_lanes(I, threshold1=threshold1, threshold2=threshold2, apertureSize=args.ksize)
        if line1 == None and line2 == None:
            print("    Error: Couldn't find lanes.")
            continue
        if line1 == None:
            print("    Error: Couldn't find left lane")
        if line2 == None:
            print("    Error: Couldn't find right lane.")
        #Irgb = plot_lines(I, line1, line2)
        Irgb = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_COLOR)
        Irgb = util_camera.draw_line(Irgb, line1, (255, 0, 0))
        Irgb = util_camera.draw_line(Irgb, line2, (0, 255, 0))
        # Draw subwindows on image
        Irgb = draw_subwindow(Irgb, win1, colour=(255, 0, 0))
        Irgb = draw_subwindow(Irgb, win2, colour=(0, 255, 0))
        cv2.imwrite('{0}_lines.png'.format(util.get_filename(imgpath)), Irgb)
        print "    LeftLane: {0}    RightLane: {1}".format(line1, line2)
    print("Done.")
Esempio n. 9
0
def main():
    with open(get_filename()) as file:
        inp = [int(line.strip()) for line in file]

    invalid_num = part1(inp, 25)
    print(invalid_num)
    print(part2(inp, invalid_num))
Esempio n. 10
0
def main():
    with open(get_filename()) as file:
        inp = [line.strip() for line in file]

    print(slide(inp, 3, 1))
    print(
        slide(inp, 1, 1) * slide(inp, 3, 1) * slide(inp, 5, 1) *
        slide(inp, 7, 1) * slide(inp, 1, 2))
def delete_redirect_files(redirects: dict):
    ''' Remove all files only containing redirects
    '''
    for redirect in redirects.keys():
        filename = os.path.join(root, links_path, get_filename(redirect))

        if os.path.exists(filename):
            os.remove(filename)
Esempio n. 12
0
def main():
    with open(get_filename()) as file:
        inp = file.readlines()
        entries = [
            int(entry) if entry != "x" else None for entry in inp[1].split(",")
        ]

    print("PART 1:", part1(int(inp[0]), entries))
    print("PART 2:", part2(entries))
Esempio n. 13
0
def main():
    with open(get_filename()) as file:
        content = file.readlines()
        inp = [
            Node.from_string(line.strip().replace(" ", "")) for line in content
        ]

    print("PART 1:", part1(inp))
    print("PART 2:", part2(content))
Esempio n. 14
0
def main():
    with open(get_filename()) as file:
        parts = file.read().split("\n\n")

    fields = [Field(field) for field in parts[0].split("\n")]
    my_ticket = Ticket(parts[1].split("\n")[1], fields)
    tickets = [Ticket(ticket, fields) for ticket in parts[2].split("\n")[1:] if ticket]

    print("PART 1:", part1(tickets))
    print("PART 2:", part2(my_ticket, [ticket for ticket in tickets if ticket.valid]))
Esempio n. 15
0
def main():
    with open(get_filename()) as file:
        instructions: ty.List[ty.List[ty.Union[str, ty.Tuple[int, int]]]] = []
        for line in file:
            if line.startswith("mask"):
                instructions.append([mask_pattern.match(line)["mask"]])
            else:
                match = mem_pattern.match(line)
                instructions[-1].append((int(match["address"]), int(match["val"])))

    print("PART 1:", part1(instructions))
    print("PART 2:", part2(instructions))
    def mapper(self, _, name):
        row = set()
        filename = get_filename(name)

        with open(main_path + "links/" + filename, "r",
                  encoding="utf-8") as file:
            for line in file:
                line = line.rstrip()
                if line in indexes:
                    row.add(indexes[line])

        yield indexes[name], list(row)
Esempio n. 17
0
def merge_log_to_missnp(output_file):
    """
    Takes in a .log file and a .missnp file and merges them together. There should be a separate output file of the
    two files merged together.
    :return: The name of the merged files, or an empty string if the log file and a .missnp file is not present.
    :rtype: String
    :param output_file: The output flag passed in as a command line argument.
    """
    output_file_root_dir = util.get_root_path(output_file)
    output_file_name = util.get_filename(output_file)

    input_logfile = '{}.log'.format(output_file)
    input_missnp = '{}-merge.missnp'.format(output_file)

    merged_missnp_output = '{}_{}'.format(output_file, 'MERGED_LOG_MISSNP.txt')
    merged_missnp_output_lines = list()
    missing_logfile = False
    missing_missnp_file = False

    try:
        with open(input_missnp, 'r') as missnp:
            merged_missnp_output_lines += missnp.readlines()
    except FileNotFoundError:
        missing_missnp_file = True
        print('.missnp file [ {} ] does not exist. Excluding from merge...'.
              format(input_missnp))

    try:
        with open(input_logfile, 'r') as logfile_in:
            for line in logfile_in:
                if line.startswith('Warning:'):
                    rs_id = re.search(
                        'rs[0-9]+', line)  # regular expression to grab rsID's
                    if rs_id:
                        rs_id = rs_id.group(0)
                        # id = line.split('rs', 1)[1]  # gets the snp id
                        rs_id = rs_id.strip('\n')
                        rs_id = rs_id.strip("'.")
                        merged_missnp_output_lines.append(
                            rs_id + '\n')  # append to missnp file
    except FileNotFoundError:
        print('Log file [ {} ] does not exist. '.format(input_logfile))
        missing_logfile = True

    if (missing_missnp_file
            and missing_logfile) or len(merged_missnp_output_lines) < 1:
        return ''
    else:
        with open(merged_missnp_output, 'w+') as merged_output:
            for line in merged_missnp_output_lines:
                merged_output.write(line)
        return merged_missnp_output
Esempio n. 18
0
def main():
    with open(get_filename()) as file:
        lines = file.readlines()

    active_states_dim3 = set()
    active_states_dim4 = set()
    for x, rows in enumerate(lines):
        for y, elem in enumerate(rows):
            if elem == "#":
                active_states_dim3.add((x, y, 0))
                active_states_dim4.add((x, y, 0, 0))

    print("PART 1:", solve(active_states_dim3, 6))
    print("PART 2:", solve(active_states_dim4, 6))
Esempio n. 19
0
def main():
    global pattern
    with open(get_filename()) as file:
        content = file.read().split("\n\n")
    rules = {
        int(index): parse_rule(rule)
        for index, rule in map(lambda r: r.split(":"), content[0].split("\n"))
    }
    messages = [message for message in content[1].split("\n")]

    print("PART 1:", solve(rules, messages))
    rules[8] = [[42], [42, 8]]
    rules[11] = [[42, 31], [42, 11, 31]]
    pattern = ""
    print("PART 2:", solve(rules, messages))
def swap_redirects(pages: list):
    ''' Swap redirects for each page
    '''
    for page in pages:
        filename = os.path.join(root, links_path, get_filename(page))
        
        if os.path.exists(filename):
            with open(filename, 'r', encoding='utf-8') as f:
                links = f.read().split('\n')

            for i, link in enumerate(links):
                if link in redirects:
                    links[i] = redirects[link]

            with open(filename, 'w', encoding='utf-8') as f:
                f.write('\n'.join(links))
Esempio n. 21
0
def run_model(cl):
	"""
	@param cl: The command line index from which to consider the model command
	Second one has to be one of 
	"""
	try:
		cl1 = str(sys.argv[cl])
		cl2 = str(sys.argv[cl+1])
	except IndexError:
		command_line_syntax('Please choose a model!')
		sys.exit(0)

	assert cl1 == '-m', command_line_syntax('You must enter -m to choose the model!')
	assert cl2 in valid_models, command_line_syntax('You have chosen an invalid model!')

	# First read in the data
	print 'Reading in data...',
	with open(util.get_filename(), 'r') as f:
		dataset = pd.read_csv(f)
	print 'done!'

	# Then create the features
	# X_train, y_train, X_test, y_test = get_features(dataset, max_features=5000)
	X_train, y_train, X_test, y_test = numerical_features(dataset)

	# Then run models based on what the argument says
	if cl2 == 'log':
		print 'Training logistic regression model...'
		logC = train_logistic(X_train, y_train, X_test, y_test)
	elif cl2 == 'rfc':
		print 'Training random forest classifier...'
		RFC = trainRandomForest(X_train, y_train, X_test, y_test)
	elif cl2 == 'nn':
		print 'Training Neural Net...'
		NN = trainNeuralNet(X_train, y_train, X_test, y_test)
	elif cl2 == 'baseline':
		training_set = [(x,y) for x,y in zip(X_train, y_train)]
		blC = classifiers.Baseline(training_set, class_labels=range(10), debug=True)
		blC.stochastic_grad_descent()
		y_pred = numpy.array([blC.predict(x) for x in X_test])
		# print y_pred
		# print y_test
		print classification_report(y_test, y_pred)
		print "accuracy score =", accuracy_score(y_test, y_pred)
Esempio n. 22
0
def get_rsIDs_from_dataset(dataset, num_rsids=0):
    """
    Removes '.' from the binary file input.
    :param num_rsids: The number of rsids to extract from the datasets
    :rtype: str
    :param dataset: The path to the .bed .bim .fam files whose '.' as rsIDs to be removed.
    """
    root_path = util.get_root_path(dataset)
    dataset_filename = util.get_filename(dataset)
    dataset_bim = util.get_bed_bim_fam_from_bfile(dataset)['bim']
    temp_extract_file = 'extract_{}.txt'.format(dataset_filename)

    output_file = '{}{}_{}'.format(root_path, dataset_filename, 'RS_ONLY')
    output_lines = set()
    with open(dataset_bim, 'r') as input_file:
        file_lines = input_file.readlines()
        if num_rsids > 0:
            file_lines = file_lines[:num_rsids]
        for line in file_lines:
            # print(line)
            if '.' not in line and not line.startswith('MT'):
                rs_id = re.search('rs[0-9]+', line)
                if rs_id:
                    rs_id = rs_id.group(0).strip()
                    output_lines.add(rs_id + '\n')

    with open(temp_extract_file, 'w+') as output:
        for line in output_lines:
            output.write(line)

    get_rs_ids_command = {
        'bfile': dataset,
        'extract': temp_extract_file,
        'out': output_file
    }
    util.call_plink(
        get_rs_ids_command,
        command_key='Get only rsIDs from input .bim file [ {} ]'.format(
            dataset_filename))
    # os.remove(temp_extract_file)
    return output_file
Esempio n. 23
0
def main():
    with open(get_filename()) as file:
        inp = [line.strip() for line in file]

    print(max(get_seats(inp)))
    print(get_missing_seat(inp))
Esempio n. 24
0
def process_file(filepath, config, lang, subclasses, classes, depends):
    filename = util.get_filename(filepath)

    if filepath.endswith(".hpp.inc"):
        special(filepath, filename[:-len(".hpp.inc")], "hinc", classes,
                depends)
        return

    if filepath.endswith(".cpp.inc"):
        special(filepath, filename[:-len(".cpp.inc")], "cinc", classes,
                depends)
        return

    fileroot = os.path.splitext(filename)[0]
    current_enum = ""
    current_class = ""
    json = False

    lineno = 1
    for line in preprocess_file(filepath):
        lineno += 1
        fields = line.split()

        if regex.match(r"^ *$", line):
            continue

        # Find any enums in the java class
        #
        match = regex.match(r" *(public|private) enum .*", line)
        if match:
            current_enum = fields[2]

            if fileroot in config.EnumMap and current_enum in config.EnumMap[
                    fileroot]:
                current_enum = config.EnumMap[fileroot][current_enum]

            classes[current_enum].class_name = current_enum
            classes[current_enum].enum = True
            classes[current_enum].elements = []

        if current_enum != "":
            line = regex.sub(r"\([^)]+\)", " ", line)
            fields = line.split()

        match = regex.match(r"^[A-Z0-9_]+[,;]?$", fields[0])
        if current_enum != "" and match:
            line = regex.sub(r"//.*$", "", line)
            line = regex.sub(r"[{;]", "", line)

            names = line.split(",")
            for name in names:
                name = name.strip()
                if name == "":
                    continue

                name = name.split()[0]

                classes[current_enum].elements.append(
                    util.attrdict(element=name))
                classes[current_enum].elements[-1]._N = len(
                    classes[current_enum].elements)

        match = regex.match(r".*;$|^}$|^};$", line)
        if current_enum != "" and match:
            classes[current_enum].elements[-1]._last = True
            current_enum = ""

        # Use the JsonCreator as the definition of a class
        #
        match = regex.match(r".*@JsonCreator.*", line)
        if match:
            json = True

        line = regex.sub(r"[()]", " ", line)
        fields = line.split()

        match = regex.match(r" *public.*", line)
        if json and match:
            current_class = fields[1] if fields[1] != "static" else fields[2]
            classes[current_class].class_name = current_class
            classes[current_class].struct = True
            classes[current_class].fields = []

            if current_class in subclasses:
                classes[current_class].subclass = True
                classes[current_class].super_class = subclasses[
                    current_class].super
                classes[current_class].json_key = subclasses[current_class].key

        match = regex.match(r" *@JsonProperty.*", line)
        if json and match and len(fields) >= 3:
            line = regex.sub(r"^[^@]*", "", line)
            line = regex.sub(r"@Nullable", "", line)
            fields = line.split()
            fields[-1] = regex.sub(r",", "", fields[-1])

            if fields[1][0] == '"':
                type = " ".join(fields[2:-1])
                name = regex.sub('"', "", fields[1])
            else:
                type = " ".join(fields[1:-1])
                name = fields[-1]

            add_field(current_class, fileroot, name, type, config, lang,
                      classes, depends)

        match = regex.match(r" *{ *", line)
        if json and match:
            add_extra(current_class, fileroot, config, lang, classes, depends)

            if len(classes[current_class].fields) == 0:
                classes.pop(current_class)
                json = False
                continue

            json = False

    return classes
Esempio n. 25
0
 def mapper(self, _, page):
     with open(os.path.join(data_path, 'links', get_filename(page)),
               mode='r',
               encoding='utf-8') as f:
         yield 'lines', sum(1 for _ in f)
Esempio n. 26
0
def main():
    with open(get_filename()) as file:
        inp = [group.splitlines() for group in file.read().split("\n\n")]

    print(parse(inp, set.union))
    print(parse(inp, set.intersection))
Esempio n. 27
0
def train_model(args: dict, hparams: dict):
    # Code for this function adopted from https://mccormickml.com/2019/07/22/BERT-fine-tuning/

    pos_file = args.pos_file
    neg_file = args.neg_file
    truncation = args.truncation
    n_samples = args.n_samples
    seed_val = hparams["seed_val"]
    device = util.get_device(device_no=args.device_no)
    saves_dir = "saves/"

    Path(saves_dir).mkdir(parents=True, exist_ok=True)
    time = datetime.datetime.now()

    saves_path = os.path.join(saves_dir, util.get_filename(time))
    Path(saves_path).mkdir(parents=True, exist_ok=True)

    log_path = os.path.join(saves_path, "training.log")

    logging.basicConfig(filename=log_path,
                        filemode='w',
                        format='%(name)s - %(levelname)s - %(message)s')
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)

    logger.info("Pos file: " + str(pos_file))
    logger.info("Neg file: " + str(neg_file))
    logger.info("Parameters: " + str(args))
    logger.info("Truncation: " + truncation)

    # Load the BERT tokenizer.
    logger.info('Loading BERT tokenizer...')
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                              do_lower_case=True)
    max_len = 0

    reviews, labels = util.read_samples_new(filename0=neg_file,
                                            filename1=pos_file,
                                            seed_val=seed_val,
                                            n_samples=n_samples,
                                            sentence_flag=True)
    print(len(reviews), len(labels))

    # Tokenize all of the sentences and map the tokens to thier word IDs.
    input_ids = []
    attention_masks = []

    # For every sentence...
    for rev in reviews:
        # `encode_plus` will:
        #   (1) Tokenize the sentence.
        #   (2) Prepend the `[CLS]` token to the start.
        #   (3) Append the `[SEP]` token to the end.
        #   (4) Map tokens to their IDs.
        #   (5) Pad or truncate the sentence to `max_length`
        #   (6) Create attention masks for [PAD] tokens.
        input_id = tokenizer.encode(rev, add_special_tokens=True)
        if len(input_id) > 512:

            if truncation == "tail-only":
                # tail-only truncation
                input_id = [tokenizer.cls_token_id] + input_id[-511:]
            elif truncation == "head-and-tail":
                # head-and-tail truncation
                input_id = [tokenizer.cls_token_id
                            ] + input_id[1:129] + input_id[-382:] + [
                                tokenizer.sep_token_id
                            ]
            else:
                # head-only truncation
                input_id = input_id[:511] + [tokenizer.sep_token_id]

            input_ids.append(torch.tensor(input_id).view(1, -1))
            attention_masks.append(
                torch.ones([1, len(input_id)], dtype=torch.long))
        else:
            encoded_dict = tokenizer.encode_plus(
                rev,  # Sentence to encode.
                add_special_tokens=True,  # Add '[CLS]' and '[SEP]'
                max_length=512,  # Pad & truncate all sentences.
                pad_to_max_length=True,
                return_attention_mask=True,  # Construct attn. masks.
                return_tensors='pt',  # Return pytorch tensors.
            )

            # Add the encoded sentence to the list.
            input_ids.append(encoded_dict['input_ids'])

            # And its attention mask (simply differentiates padding from non-padding).
            attention_masks.append(encoded_dict['attention_mask'])

    # Convert the lists into tensors.
    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)
    labels = torch.tensor(labels)

    # Combine the training inputs into a TensorDataset.
    dataset = TensorDataset(input_ids, attention_masks, labels)

    # Create a 90-10 train-validation split.
    # Calculate the number of samples to include in each set.
    train_size = int(0.9 * len(dataset))
    val_size = len(dataset) - train_size

    # Divide the dataset by randomly selecting samples.
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    logger.info('{:>5,} training samples'.format(train_size))
    logger.info('{:>5,} validation samples'.format(val_size))

    # The DataLoader needs to know our batch size for training, so we specify it
    # here. For fine-tuning BERT on a specific task, the authors recommend a batch
    # size of 16 or 32.
    batch_size = hparams["batch_size"]

    # Create the DataLoaders for our training and validation sets.
    # We'll take training samples in random order.
    train_dataloader = DataLoader(
        train_dataset,  # The training samples.
        sampler=RandomSampler(train_dataset),  # Select batches randomly
        batch_size=batch_size  # Trains with this batch size.
    )

    # For validation the order doesn't matter, so we'll just read them sequentially.
    validation_dataloader = DataLoader(
        val_dataset,  # The validation samples.
        sampler=SequentialSampler(
            val_dataset),  # Pull out batches sequentially.
        batch_size=batch_size  # Evaluate with this batch size.
    )

    # Load BertForSequenceClassification, the pretrained BERT model with a single
    # linear classification layer on top.
    model = BertForSequenceClassification.from_pretrained(
        "bert-base-uncased",  # Use the 12-layer BERT model, with an uncased vocab.
        num_labels=
        2,  # The number of output labels--2 for binary classification.
        # You can increase this for multi-class tasks.
        output_attentions=False,  # Whether the model returns attentions weights.
        output_hidden_states=
        False,  # Whether the model returns all hidden-states.        
    )

    # Tell pytorch to run this model on the GPU.
    model = model.to(device=device)
    # model.cuda(device=device)

    # Note: AdamW is a class from the huggingface library (as opposed to pytorch)
    # I believe the 'W' stands for 'Weight Decay fix"
    optimizer = AdamW(
        model.parameters(),
        lr=hparams[
            "learning_rate"],  # args.learning_rate - default is 5e-5, our notebook had 2e-5
        eps=hparams["adam_epsilon"]  # args.adam_epsilon  - default is 1e-8.
    )

    # Number of training epochs. The BERT authors recommend between 2 and 4.
    # We chose to run for 4, but we'll see later that this may be over-fitting the
    # training data.
    epochs = 4

    # Total number of training steps is [number of batches] x [number of epochs].
    # (Note that this is not the same as the number of training samples).
    total_steps = len(train_dataloader) * epochs

    # Create the learning rate scheduler.
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,  # Default value in run_glue.py
        num_training_steps=total_steps)

    # This training code is based on the `run_glue.py` script here:
    # https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128

    # Set the seed value all over the place to make this reproducible.
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

    # We'll store a number of quantities such as training and validation loss,
    # validation accuracy, and timings.
    training_stats = []

    # For each epoch...
    for epoch_i in range(0, epochs):

        # ========================================
        #               Training
        # ========================================

        # Perform one full pass over the training set.
        logger.info("")
        logger.info('======== Epoch {:} / {:} ========'.format(
            epoch_i + 1, epochs))
        logger.info('Training...')

        # Reset the total loss for this epoch.
        total_train_loss = 0

        # Put the model into training mode. Don't be mislead--the call to
        # `train` just changes the *mode*, it doesn't *perform* the training.
        # `dropout` and `batchnorm` layers behave differently during training
        # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)
        model.train()

        # For each batch of training data...
        for step, batch in enumerate(train_dataloader):

            # Progress update every 40 batches.
            if step % 40 == 0 and not step == 0:
                # Report progress.
                logger.info('  Batch {:>5,}  of  {:>5,}. '.format(
                    step, len(train_dataloader)))

            # Unpack this training batch from our dataloader.
            #
            # As we unpack the batch, we'll also copy each tensor to the GPU using the
            # `to` method.
            #
            # `batch` contains three pytorch tensors:
            #   [0]: input ids
            #   [1]: attention masks
            #   [2]: labels
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)

            # Always clear any previously calculated gradients before performing a
            # backward pass. PyTorch doesn't do this automatically because
            # accumulating the gradients is "convenient while training RNNs".
            # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
            model.zero_grad()

            # Perform a forward pass (evaluate the model on this training batch).
            # The documentation for this `model` function is here:
            # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
            # It returns different numbers of parameters depending on what arguments
            # arge given and what flags are set. For our useage here, it returns
            # the loss (because we provided labels) and the "logits"--the model
            # outputs prior to activation.
            loss, logits = model(b_input_ids,
                                 token_type_ids=None,
                                 attention_mask=b_input_mask,
                                 labels=b_labels)

            # Accumulate the training loss over all of the batches so that we can
            # calculate the average loss at the end. `loss` is a Tensor containing a
            # single value; the `.item()` function just returns the Python value
            # from the tensor.
            total_train_loss += loss.detach().cpu().numpy()

            # Perform a backward pass to calculate the gradients.
            loss.backward()

            # Clip the norm of the gradients to 1.0.
            # This is to help prevent the "exploding gradients" problem.
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            # Update parameters and take a step using the computed gradient.
            # The optimizer dictates the "update rule"--how the parameters are
            # modified based on their gradients, the learning rate, etc.
            optimizer.step()

            # Update the learning rate.
            scheduler.step()

        # Calculate the average loss over all of the batches.
        avg_train_loss = total_train_loss / len(train_dataloader)

        logger.info("")
        logger.info("  Average training loss: {0:.2f}".format(avg_train_loss))

        # ========================================
        #               Validation
        # ========================================
        # After the completion of each training epoch, measure our performance on
        # our validation set.

        logger.info("")
        logger.info("Running Validation...")

        # Put the model in evaluation mode--the dropout layers behave differently
        # during evaluation.
        model.eval()

        # Tracking variables
        total_eval_accuracy = 0
        total_eval_loss = 0

        # Evaluate data for one epoch
        for batch in validation_dataloader:

            # Unpack this training batch from our dataloader.
            #
            # As we unpack the batch, we'll also copy each tensor to the GPU using
            # the `to` method.
            #
            # `batch` contains three pytorch tensors:
            #   [0]: input ids
            #   [1]: attention masks
            #   [2]: labels
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)

            # Tell pytorch not to bother with constructing the compute graph during
            # the forward pass, since this is only needed for backprop (training).
            with torch.no_grad():

                # Forward pass, calculate logit predictions.
                # token_type_ids is the same as the "segment ids", which
                # differentiates sentence 1 and 2 in 2-sentence tasks.
                # The documentation for this `model` function is here:
                # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
                # Get the "logits" output by the model. The "logits" are the output
                # values prior to applying an activation function like the softmax.
                (loss, logits) = model(b_input_ids,
                                       token_type_ids=None,
                                       attention_mask=b_input_mask,
                                       labels=b_labels)

            # Accumulate the validation loss.
            total_eval_loss += loss.detach().cpu().numpy()

            # Move logits and labels to CPU
            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()

            # Calculate the accuracy for this batch of test sentences, and
            # accumulate it over all batches.
            total_eval_accuracy += flat_accuracy(logits, label_ids)

        # Report the final accuracy for this validation run.
        avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
        logger.info("  Accuracy: {0:.2f}".format(avg_val_accuracy))

        # Calculate the average loss over all of the batches.
        avg_val_loss = total_eval_loss / len(validation_dataloader)

        logger.info("  Validation Loss: {0:.2f}".format(avg_val_loss))

        # Record all statistics from this epoch.
        training_stats.append({
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy,
        })

        model_save_path = os.path.join(saves_path,
                                       "model_" + str(epoch_i + 1) + "epochs")
        torch.save(model, model_save_path)

    logger.info("")
    logger.info("Training complete!")
    handlers = logger.handlers[:]
    for handler in handlers:
        handler.close()
        logger.removeHandler(handler)
Esempio n. 28
0
    def load(self):
        levels = self.get_levels()
        if self.level_id > len(levels) - 1:
            self.app.set_state(main.WIN)
            self.app.score_manager.save()
        else:
            self.app.gui_manager.set_state(gui_manager.FADE_IN)

            self.app.game_manager.clear_level()

            self.level_name = levels[self.level_id]
            self.app.gui_manager.update_times(
                self.app.score_manager.run_scores.get(
                    util.get_filename(self.mode, self.level_name), 0),
                self.app.score_manager.get_record(self.mode, self.level_name))

            # map_data = levels.levels[self.level_id]()
            if self.mode == 0:
                directory = 'levels'
            elif self.mode == 1:
                directory = 'survival'

            with open('{}/{}.dat'.format(directory, self.level_name),
                      'rb') as f:
                map_data = pickle.load(f)

            scene = self.app.renderer.scene

            model_name, texture = map_data['terrain']
            self.app.game_manager.terrain = terrain.Terrain(
                self.app, model_name, texture)
            scene.add(self.app.game_manager.terrain.canvas)

            self.app.game_manager.player.spawn(map_data['spawn_pos'])

            if self.mode == 0:
                self.app.game_manager.goal.spawn(map_data['goal_pos'])
            elif self.mode == 1:
                self.app.game_manager.goal.despawn()

            for data in map_data['buildings']:
                b = building.Building(self.app,
                                      building.Building.data[data[0]],
                                      data[1:4], data[4])
                # b = building.Building(self.app, building.Building.data[data[0]], data[1:4], 0)
                self.app.game_manager.game_objects.add(b)
                scene.add(b.canvas)

            for data in map_data['platforms']:
                if data[0] == 0:
                    platform.Hedge(self.app, data[1:4])
                elif data[0] == 1:
                    platform.InvisiblePlatform(self.app, data[1:4])
                elif data[0] == 2:
                    platform.LavaPlatform(self.app, data[1:4])
                elif data[0] == 3:
                    platform.Trampoline(self.app, data[1:4])

            for data in map_data['elevators']:
                e = elevator.Elevator(self.app, data[1:4], data[4])
                self.app.game_manager.game_objects.add(e)  # spawn later
                scene.add(e.canvas)

            for data in map_data['powerups']:
                if data[0] == 0:
                    e = powerup.Fuel(self.app, data[1:4])
                    self.app.game_manager.game_objects.add(e)  # spawn later
                    scene.add(e.canvas)
                elif data[0] == 1:
                    e = powerup.Health(self.app, data[1:4])
                    self.app.game_manager.game_objects.add(e)  # spawn later
                    scene.add(e.canvas)
                elif data[0] == 2:
                    e = powerup.SlowTime(self.app, data[1:4])
                    self.app.game_manager.game_objects.add(e)  # spawn later
                    scene.add(e.canvas)

            for data in map_data['vehicles']:
                if data[0] == 0:
                    v = car.Car(self.app)
                elif data[0] == 1:
                    v = helicopter.Helicopter(self.app)
                v.spawn(data[1:4])

            for data in map_data['enemies']:
                if data[0] == 0:
                    enemy.Turret(self.app, data[1:4])
                elif data[0] == 1:
                    enemy.Bee(self.app, data[1:4])
                elif data[0] == 2:
                    enemy.BowlSpawner(self.app, data[1:4])
                elif data[0] == 3:
                    enemy.InvisibleEnemy(self.app, data[1:4])

            self.app.game_manager.set_state(self.mode)
Esempio n. 29
0
 def get_level_name(self):
     return util.get_filename(self.mode, self.level_name)
Esempio n. 30
0
def fit_model(data, n_topics, iterations, passes, min_prob, eval_every, n_best,
              min_df, max_df, preserved_words):
    dt = cur_date()
    output_folder = "lda_%stopics_%s" % (n_topics, dt)
    os.makedirs(output_folder, exist_ok=True)
    os.makedirs("%s/separate" % output_folder, exist_ok=True)

    logging.info("creating corpus...")
    dictionary, corpus = make_corpus(list(data.values()), min_df, max_df,
                                     preserved_words, output_folder)
    # generate LDA model
    logging.info("training model...")
    lda = LdaModel(corpus,
                   num_topics=n_topics,
                   id2word=dictionary,
                   iterations=iterations,
                   passes=passes,
                   minimum_probability=min_prob,
                   eval_every=eval_every)
    logging.info("saving model...")
    lda.save('saved/lda_%s_%s.serialized' % (n_topics, dt))
    # print(lda.print_topics(num_topics=n_topics, num_words=4))

    # save all-vs-all pairwise similarities
    logging.info("creating index...")
    index = Similarity('./sim_index',
                       lda[corpus],
                       num_features=n_topics,
                       num_best=n_best + 1)
    paths = list(data.keys())
    logging.info("write all similarities to result file")
    with open('%s/similarities.txt' % output_folder, 'w') as res_file:
        with open('%s/similarities_summary.txt' % output_folder,
                  'w',
                  encoding='utf-8') as res_file_sum:
            for i, similarities in enumerate(index):
                cur_fname = get_filename(paths[i])
                top_similar = [(paths[s[0]], s[1]) for s in similarities
                               if s[0] != i]
                res_file.write('%s: %s\n' %
                               (cur_fname, [(get_filename(p), c)
                                            for (p, c) in top_similar]))

                res_file_sum.write('%s: %s\n' %
                                   (cur_fname, get_title(paths[i])))
                for sim in top_similar:
                    res_file_sum.write(
                        '%s: %s' % (get_filename(sim[0]), get_title(sim[0])))
                res_file_sum.write('-' * 100 + '\n')

                # for each doc we make separate file which containts list of similar docs
                with open(
                        '%s/separate/%s.txt' %
                    (output_folder, cur_fname.split('.')[0]), 'w') as sep_res:
                    for sim in top_similar:
                        sep_res.write('%s\n' % get_filename(sim[0]))

    logging.info("save index")
    index.save('saved/lda_index_%s.index' % dt)

    # save topic - words matrix
    with open("%s/topic_words.txt" % output_folder, 'w',
              encoding='utf-8') as f:
        for topic_words in lda.print_topics(lda.num_topics):
            f.write("#%s: %s\n" % (topic_words[0], topic_words[1]))

    # save document - topics matrix
    with open("%s/document_topics.txt" % output_folder, 'w') as f:
        for i, topics in enumerate(lda[corpus]):
            f.write("#%s: %s\n" % (get_filename(paths[i]), topics))

    # save dictionary
    dictionary.save_as_text("%s/dictionary.txt" % output_folder)
Esempio n. 31
0
def get_pair(prefix):
    files = glob.glob(weatherdir +
                      'dd_temp_eng/{0}*.csv'.format(prefix))
    pairs = [get_bs(util.get_filename(x)) for x in files] 
    return pairs