def MakeDict(data_pbtxt): data_pb = util.ReadData(data_pbtxt) rep_dict = {} stats_files = {} for data in data_pb.data: rep_dict[data.name] = Load(data.file_pattern) stats_files[data.name] = data.stats_file return rep_dict, stats_files
def ReadData(): data = [] for line in util.ReadData(3): patch, claim = line.split(" @ ") coord, dim = claim.split(": ") coordx,coordy = coord.split(",") dimx, dimy = dim.split("x") data.append(((int(coordx), int(coordy)), (int(dimx), int(dimy)), patch)) return data
def GetDataHandles(op, names, hyp_list, verbose=False): """Returns a list of data handles. This method is the top-level routine for creating data handlers. It takes a description of which datasets to load and returns data handlers to access them. Args: op: Operation protocol buffer. names: list of list of data names. The top level list corresponds to train, validation and test sets. The lower-level lists correspond to data modalities. hyp_list: List of hyperparameters for each modality. verbose: If True, will print out details of what is happening. Returns: A list of DataHandler objects. """ typesize = 4 data_proto_file = os.path.join(op.data_proto_prefix, op.data_proto) dataset_proto = util.ReadData(data_proto_file) handlers = [] if dataset_proto.data_handler == 'deepnet': size_list = [] for name_list in names: size = 0 for name in name_list: try: data_proto = next(d for d in dataset_proto.data if d.name == name) except StopIteration as e: print( '%s not found in data pbtxt' % name ) raise e datasetsize = data_proto.size numdims = np.prod(np.array(data_proto.dimensions)) size += datasetsize * numdims * typesize size_list.append(size) total_size = sum(size_list) proportions = [float(size)/total_size for size in size_list] for i, name_list in enumerate(names): if name_list == []: handlers.append(None) else: handlers.append(DataHandler(op, name_list, hyp_list, frac=proportions[i])) elif dataset_proto.data_handler == 'navdeep': import navdeep_datahandler for i, name_list in enumerate(names): if name_list == []: handlers.append(None) else: handlers.append(navdeep_datahandler.NavdeepDataHandler( op, dataset_proto, name_list, hyp_list)) return handlers
def main(): data_dir = sys.argv[1] model_dir = sys.argv[2] rep_dir = sys.argv[3] gpu_mem = sys.argv[4] main_mem = sys.argv[5] numsplits = int(sys.argv[6]) data_pbtxt_file = os.path.join(data_dir, 'joint.pbtxt') proto_pbtxt_file = os.path.join(data_dir, 'joint.proto') data_pb = util.ReadData(data_pbtxt_file) EditPaths(data_pb, data_dir, gpu_mem, main_mem) #try: with open(proto_pbtxt_file, 'w') as f: text_format.PrintMessage(data_pb, f) #except: # print("exception") EditTrainers(data_dir, model_dir, rep_dir, numsplits)
def __init__(self, op, data_name_list, hyperparameter_list, frac=1.0): """Initializes a DataHandler. Args: op: Operation protocol buffer. data_name_list: List of data names that should be put together. (Usually refers to a list of different modalities, e.g., ['data', 'label'] or ['image', 'audio'].) hyperparameter_list: List of hyperparameters, one for each modality. frac: What fraction of the total memory should this data handler use. """ filenames = [] numdim_list = [] datasetsize = None left_window = [] right_window = [] stats_files = [] shift = [] add_noise = [] shift_amt_x = [] shift_amt_y = [] keys = [] typesize = 4 if isinstance(op, str): op = util.ReadOperation(op) self.verbose = op.verbose verbose = self.verbose data_proto_file = os.path.join(op.data_proto_prefix, op.data_proto) dataset_proto = util.ReadData(data_proto_file) seq = False is_train = False for name, hyp in zip(data_name_list, hyperparameter_list): data_proto = next(d for d in dataset_proto.data if d.name == name) file_pattern = os.path.join(dataset_proto.prefix, data_proto.file_pattern) filenames.append(sorted(glob.glob(file_pattern))) stats_files.append(os.path.join(dataset_proto.prefix, data_proto.stats_file)) numdims = np.prod(np.array(data_proto.dimensions)) if not data_proto.sparse: numdims *= data_proto.num_labels numdim_list.append(numdims) seq = seq or data_proto.seq left_window.append(hyp.left_window) right_window.append(hyp.right_window) add_noise.append(hyp.add_noise) shift.append(hyp.shift) shift_amt_x.append(hyp.shift_amt_x) shift_amt_y.append(hyp.shift_amt_y) keys.append(data_proto.key) is_train = 'train' in name # HACK - Fix this! if datasetsize is None: datasetsize = data_proto.size else: assert datasetsize == data_proto.size, 'Size of %s is not %d' % ( name, datasetsize) # Add space for padding. if seq: max_rw = max(right_window) max_lw = max(left_window) actual_datasetsize = datasetsize datasetsize += len(filenames[0]) * (max_rw + max_lw) numdims = sum(numdim_list) batchsize = op.batchsize randomize = op.randomize self.get_last_piece = op.get_last_piece # Compute size of each cache. total_disk_space = datasetsize * numdims * typesize max_gpu_capacity = int(frac*GetBytes(dataset_proto.gpu_memory)) max_cpu_capacity = int(frac*GetBytes(dataset_proto.main_memory)) # Each capacity should correspond to integral number of batches. vectorsize_bytes = typesize * numdims batchsize_bytes = vectorsize_bytes * batchsize max_gpu_capacity = (max_gpu_capacity / batchsize_bytes) * batchsize_bytes #max_cpu_capacity = (max_cpu_capacity / batchsize_bytes) * batchsize_bytes # Don't need more than total dataset size. gpu_capacity = min(total_disk_space, max_gpu_capacity) cpu_capacity = min(total_disk_space, max_cpu_capacity) num_gpu_batches = gpu_capacity / batchsize_bytes num_cpu_batches = cpu_capacity / batchsize_bytes gpu_left_overs = gpu_capacity / vectorsize_bytes - num_gpu_batches * batchsize cpu_left_overs = cpu_capacity / vectorsize_bytes - num_cpu_batches * batchsize if self.verbose: if seq: num_valid_gpu_vectors = (gpu_capacity/vectorsize_bytes) - len(filenames[0])*(max_rw+max_lw) print (num_valid_gpu_vectors) else: print ( 'Batches in GPU memory: %d + leftovers %d' % (num_gpu_batches, gpu_left_overs) ) print ('Batches in main memory: %d + leftovers %d' % (num_cpu_batches, cpu_left_overs) ) print ('Batches in disk: %d + leftovers %d' % ((datasetsize / batchsize), datasetsize % batchsize) ) if seq: import sequence_datahandler as seq_dh self.disk = seq_dh.SequenceDisk( filenames, numdim_list, datasetsize, keys=keys, left_window=left_window, right_window=right_window, verbose=verbose) self.cpu_cache = seq_dh.SequenceCache( self.disk, cpu_capacity, numdim_list, typesize = typesize, randomize=randomize, left_window=left_window, right_window=right_window, verbose=verbose) self.gpu_cache = seq_dh.SequenceGPUCache( self.cpu_cache, gpu_capacity, numdim_list, typesize = typesize, randomize=randomize, left_window=left_window, right_window=right_window, verbose=verbose, batchsize=batchsize) else: self.disk = Disk(filenames, numdim_list, datasetsize, keys=keys, verbose=self.verbose) self.cpu_cache = Cache(self.disk, cpu_capacity, numdim_list, typesize = typesize, randomize=randomize, verbose=self.verbose) self.gpu_cache = GPUCache(self.cpu_cache, gpu_capacity, numdim_list, typesize = typesize, randomize=randomize, verbose=self.verbose, shift=shift, add_noise=add_noise, center_only=not is_train, shift_amt_x=shift_amt_x, shift_amt_y=shift_amt_y) for i, stats_file in enumerate(stats_files): if hyperparameter_list[i].normalize and hyperparameter_list[i].activation != deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX: self.gpu_cache.SetDataStats(i, stats_file) self.batchsize = batchsize if seq: datasetsize = actual_datasetsize self.num_batches = datasetsize / batchsize if self.get_last_piece and datasetsize % batchsize > 0: self.num_batches += 1
scores[player] += marble + board[current] del board[current] else: current = (current + 2) % len(board) #board = board[:current] + blist([marble,]) + board[current:] board.insert(current, marble) player = (player + 1) % players if marble % int(marbles / 1000) == 0: print(marble / int(marbles / 100), "%", marble, current) return max(scores) def part1(players, marbles): util.Answer(1, Play(players, marbles)) def part2(players, marbles): util.Answer(2, Play(players, marbles)) if __name__ == "__main__": #print(Play(9, 25), 32) print(Play(13, 7999), 146373) players, marbles = util.ReadData(9)[0].split( " players; last marble is worth ") players, marbles = int(players), int(marbles.split()[0]) print("Players:", players, "Marbles:", marbles) part1(players, marbles) part2(players, marbles * 100)
for y in range(1, 302 - 3): power = gridpower(x, y, 3, grid) if power > max: max = power maxcell = (x, y) util.Answer(1, maxcell) def part2(id): grid = makegrid(id) max = -9999999 maxcell = None for cell in range(300, 1, -1): for x in range(1, 302 - cell): for y in range(1, 302 - cell): power = gridpower(x, y, cell, grid) if power > max: max = power maxcell = (x, y, cell) print(cell, maxcell, max) util.Answer(1, maxcell) if __name__ == "__main__": print(powerlevel(122, 79, 57), -5) print(powerlevel(217, 196, 39), 0) data = int(util.ReadData(11)[0]) part1(data) part2(data)
import util import deepnet_pb2 import sys, os from google.protobuf import text_format proto1 = sys.argv[1] proto2 = sys.argv[2] output_pbtxt = sys.argv[3] out_dir = '/'.join(output_pbtxt.split('/')[:-1]) if out_dir and not os.path.isdir(out_dir): os.makedirs(out_dir) dataset1 = util.ReadData(proto1) name1 = dataset1.name dataset2 = util.ReadData(proto2) name2 = dataset2.name dataset1_prefix = dataset1.prefix dataset2_prefix = dataset2.prefix prefix = os.path.commonprefix([dataset1_prefix, dataset2_prefix]) if dataset1_prefix != dataset2_prefix: for dataset in [dataset1, dataset2]: _prefix = dataset.prefix[len(prefix):] for d in dataset.data: if d.file_pattern: d.file_pattern = os.path.join(_prefix, d.file_pattern) if d.stats_file: d.file_pattern = os.path.join(_prefix, d.stats_file) dataset1.MergeFrom(dataset2)
import util def part1(data): x = sorted(data, key=lambda coord: coord[1][0]) y = sorted(data, key=lambda coord: coord[1][1]) for coord in x: print(coord) util.Answer(1, None) def part2(data): util.Answer(2, None) if __name__ == "__main__": data = util.ReadData(6) coords = [] for i, line in enumerate(data): x, y = line.strip().split(', ') coords.append((i, (int(x),int(y)))) part1(coords) part2(coords)
from typing import List import util def part1(inputs: List[int]): freq = 0 for inp in inputs: freq += inp util.Answer(1, freq) def part2(inputs: List[int]): freq = 0 visited = {0: 1} while visited[freq] == 1: for inp in inputs: freq += inp if freq in visited: visited[freq] += 1 break visited[freq] = 1 util.Answer(2, freq) if __name__ == "__main__": inputs = [int(line) for line in util.ReadData(1)] part1(inputs) part2(inputs)
points.append(PointWithVelocity(x, y, dx, dy)) return points def Range(points): x = [p.pos.x for p in points] y = [p.pos.y for p in points] return max(x), min(x), max(y), min(y) if __name__ == "__main__": with open("data/puzzle_10_test.txt") as f: testdata = f.read().split('\n') testdata = parsedata(testdata) data = util.ReadData(10) points = parsedata(data) seconds = 0 maxx, minx, maxy, miny = Range(points) lastxrange = maxx - minx + 1 while lastxrange > maxx - minx: lastxrange = maxx - minx for p in points: p.Step() seconds += 1 maxx, minx, maxy, miny = Range(points) for p in points: p.Step(-1)
root.children.append(child) root.meta = data[offset:offset+metacount] offset += metacount metasum += sum(root.meta) return root, offset, metasum def part1(data): _, _, metasum = ReadNode(data) util.Answer(1, metasum) def part2(data): root, _, _ = ReadNode(data) util.Answer(2, root.value()) if __name__ == "__main__": testdata = "2 3 0 3 10 11 12 1 1 0 1 99 2 1 1 2".split(' ') testdata = list(int(v) for v in testdata) part1(testdata) part2(testdata) data = util.ReadData(8)[0].split(' ') data = list(int(v) for v in data) part1(data) part2(data)
def part2(letters): letters = react(letters) # start from the reacted string letter = None length = None for c in string.ascii_lowercase: tmp = list(l for l in letters if not same(l, c)) count = len(react(tmp)) if letter is None or count < length: letter = c length = count util.Answer(2, (letter, length)) if __name__ == "__main__": assert will_react('a', 'A') assert will_react('A', 'a') assert not will_react('a', 'a') assert same('a', 'A') assert same('A', 'a') assert same('a', 'a') data, = util.ReadData(5) letters = list(c for c in data) # bust the string out into a list part1(letters) part2(letters)
hasTwo = False hasThree = False for count in counts.values(): hasTwo |= (count == 2) hasThree |= (count == 3) two += hasTwo three += hasThree util.Answer(1, two*three) def part2(inputs): for left in inputs: for right in inputs: if left == right: continue common = [] for i in range(len(left)): if left[i] == right[i]: common.append(left[i]) if len(common) < i: break if len(common) == len(left) - 1: util.Answer(2, "".join(common)) return if __name__ == "__main__": inputs = util.ReadData(2) part1(inputs) part2(inputs)
_, guard, _, _ = message.split(' ') if guard not in sleep: sleep[guard] = {} elif message == "falls asleep": fell_asleep_time = t elif message == "wakes up": minutes = int((t - fell_asleep_time).seconds / 60) for i in range(minutes): sleep_minute = (fell_asleep_time + timedelta(minutes=i)).strftime("%H:%M") if sleep_minute not in sleep[guard]: sleep[guard][sleep_minute] = [] sleep[guard][sleep_minute].append(minutes) fell_asleep_time = None else: assert False for guard, sleeptimes in sleep.items(): t = list(sleeptimes.keys())[0] maxsleep = (t, len(sleeptimes[t])) totalsleep = 0 for t, spans in sleeptimes.items(): totalsleep += len(spans) if maxsleep[1] < len(spans): maxsleep = (t, len(spans)) print(guard, maxsleep, totalsleep) if __name__ == "__main__": inputs = FormatData(util.ReadData(4)) part1(inputs)