Esempio n. 1
0
def MakeDict(data_pbtxt):
    data_pb = util.ReadData(data_pbtxt)
    rep_dict = {}
    stats_files = {}
    for data in data_pb.data:
        rep_dict[data.name] = Load(data.file_pattern)
        stats_files[data.name] = data.stats_file
    return rep_dict, stats_files
Esempio n. 2
0
def ReadData():
	data = []
	for line in util.ReadData(3):
		patch, claim = line.split(" @ ")
		coord, dim = claim.split(": ")
		coordx,coordy = coord.split(",")
		dimx, dimy = dim.split("x")
		data.append(((int(coordx), int(coordy)), (int(dimx), int(dimy)), patch))
	return data
Esempio n. 3
0
def GetDataHandles(op, names, hyp_list, verbose=False):
  """Returns a list of data handles.

  This method is the top-level routine for creating data handlers. It takes a
  description of which datasets to load and returns data handlers to access
  them.
  Args:
    op: Operation protocol buffer.
    names: list of list of data names. The top level list corresponds to train,
      validation and test sets. The lower-level lists correspond to data
      modalities.
    hyp_list: List of hyperparameters for each modality.
    verbose: If True, will print out details of what is happening.
  Returns:
    A list of DataHandler objects.
  """
  typesize = 4
  data_proto_file = os.path.join(op.data_proto_prefix, op.data_proto)
  dataset_proto = util.ReadData(data_proto_file)
  handlers = []
  if dataset_proto.data_handler == 'deepnet':
    size_list = []
    for name_list in names:
      size = 0
      for name in name_list:
        try:
          data_proto = next(d for d in dataset_proto.data if d.name == name)
        except StopIteration as e:
          print( '%s not found in data pbtxt' % name )
          raise e
        datasetsize = data_proto.size
        numdims = np.prod(np.array(data_proto.dimensions))
        size += datasetsize * numdims * typesize
      size_list.append(size)
    total_size = sum(size_list)
    proportions = [float(size)/total_size for size in size_list]
    for i, name_list in enumerate(names):
      if name_list == []:
        handlers.append(None)
      else:
        handlers.append(DataHandler(op, name_list, hyp_list, frac=proportions[i]))
  elif dataset_proto.data_handler == 'navdeep':
    import navdeep_datahandler
    for i, name_list in enumerate(names):
      if name_list == []:
        handlers.append(None)
      else:
        handlers.append(navdeep_datahandler.NavdeepDataHandler(
          op, dataset_proto, name_list, hyp_list))

  return handlers
Esempio n. 4
0
def main():
  data_dir = sys.argv[1]
  model_dir = sys.argv[2]
  rep_dir = sys.argv[3]
  gpu_mem = sys.argv[4]
  main_mem = sys.argv[5]
  numsplits = int(sys.argv[6])

  
  data_pbtxt_file = os.path.join(data_dir, 'joint.pbtxt')
  proto_pbtxt_file = os.path.join(data_dir, 'joint.proto')
  data_pb = util.ReadData(data_pbtxt_file)
  
  EditPaths(data_pb, data_dir, gpu_mem, main_mem)
  
  #try:
  with open(proto_pbtxt_file, 'w') as f:
      text_format.PrintMessage(data_pb, f)
  #except:
  #    print("exception")
  EditTrainers(data_dir, model_dir, rep_dir, numsplits)
Esempio n. 5
0
  def __init__(self, op, data_name_list, hyperparameter_list, frac=1.0):
    """Initializes a DataHandler.
    Args:
      op: Operation protocol buffer.
      data_name_list: List of data names that should be put together. (Usually
        refers to a list of different modalities, e.g., ['data', 'label'] or
        ['image', 'audio'].)
      hyperparameter_list: List of hyperparameters, one for each modality.
      frac: What fraction of the total memory should this data handler use.
    """
    filenames = []
    numdim_list = []
    datasetsize = None
    left_window = []
    right_window = []
    stats_files = []
    shift = []
    add_noise = []
    shift_amt_x = []
    shift_amt_y = []
    keys = []
    typesize = 4
    if isinstance(op, str):
      op = util.ReadOperation(op)
    self.verbose = op.verbose
    verbose = self.verbose
    data_proto_file = os.path.join(op.data_proto_prefix, op.data_proto)
    dataset_proto = util.ReadData(data_proto_file)
    seq = False
    is_train = False
    for name, hyp in zip(data_name_list, hyperparameter_list):
      data_proto = next(d for d in dataset_proto.data if d.name == name)
      file_pattern = os.path.join(dataset_proto.prefix, data_proto.file_pattern)
      filenames.append(sorted(glob.glob(file_pattern)))
      stats_files.append(os.path.join(dataset_proto.prefix, data_proto.stats_file))
      numdims = np.prod(np.array(data_proto.dimensions))
      if not data_proto.sparse:
        numdims *= data_proto.num_labels
      numdim_list.append(numdims)
      seq = seq or data_proto.seq
      left_window.append(hyp.left_window)
      right_window.append(hyp.right_window)
      add_noise.append(hyp.add_noise)
      shift.append(hyp.shift)
      shift_amt_x.append(hyp.shift_amt_x)
      shift_amt_y.append(hyp.shift_amt_y)
      keys.append(data_proto.key)
      is_train = 'train' in name  # HACK - Fix this!
      if datasetsize is None:
        datasetsize = data_proto.size
      else:
        assert datasetsize == data_proto.size, 'Size of %s is not %d' % (
          name, datasetsize)

    # Add space for padding.
    if seq:
      max_rw = max(right_window)
      max_lw = max(left_window)
      actual_datasetsize = datasetsize
      datasetsize += len(filenames[0]) * (max_rw + max_lw)

    numdims = sum(numdim_list)
    batchsize = op.batchsize
    randomize = op.randomize
    self.get_last_piece = op.get_last_piece
    # Compute size of each cache.
    total_disk_space = datasetsize * numdims * typesize
    max_gpu_capacity = int(frac*GetBytes(dataset_proto.gpu_memory))
    max_cpu_capacity = int(frac*GetBytes(dataset_proto.main_memory))

    # Each capacity should correspond to integral number of batches.
    vectorsize_bytes = typesize * numdims
    batchsize_bytes = vectorsize_bytes * batchsize
    max_gpu_capacity = (max_gpu_capacity / batchsize_bytes) * batchsize_bytes
    #max_cpu_capacity = (max_cpu_capacity / batchsize_bytes) * batchsize_bytes

    # Don't need more than total dataset size.
    gpu_capacity = min(total_disk_space, max_gpu_capacity) 
    cpu_capacity = min(total_disk_space, max_cpu_capacity) 
    num_gpu_batches = gpu_capacity / batchsize_bytes
    num_cpu_batches = cpu_capacity / batchsize_bytes

    gpu_left_overs = gpu_capacity / vectorsize_bytes - num_gpu_batches * batchsize
    cpu_left_overs = cpu_capacity / vectorsize_bytes - num_cpu_batches * batchsize
    
    if self.verbose:
      if seq:
        num_valid_gpu_vectors = (gpu_capacity/vectorsize_bytes) - len(filenames[0])*(max_rw+max_lw)
        print (num_valid_gpu_vectors)

      else:
        print ( 'Batches in GPU memory: %d + leftovers %d' % (num_gpu_batches, gpu_left_overs) )
        print ('Batches in main memory: %d + leftovers %d' % (num_cpu_batches, cpu_left_overs) )
        print ('Batches in disk: %d + leftovers %d' % ((datasetsize / batchsize), datasetsize % batchsize) )
    
    if seq:
      import sequence_datahandler as seq_dh
      self.disk = seq_dh.SequenceDisk(
        filenames, numdim_list, datasetsize, keys=keys, left_window=left_window,
        right_window=right_window, verbose=verbose)
      self.cpu_cache = seq_dh.SequenceCache(
        self.disk, cpu_capacity, numdim_list, typesize = typesize,
        randomize=randomize, left_window=left_window,
        right_window=right_window, verbose=verbose)
      self.gpu_cache = seq_dh.SequenceGPUCache(
        self.cpu_cache, gpu_capacity, numdim_list, typesize = typesize,
        randomize=randomize, left_window=left_window,
        right_window=right_window, verbose=verbose, batchsize=batchsize)
    else:
      self.disk = Disk(filenames, numdim_list, datasetsize, keys=keys,
                       verbose=self.verbose)
      self.cpu_cache = Cache(self.disk, cpu_capacity, numdim_list,
                             typesize = typesize, randomize=randomize,
                             verbose=self.verbose)
      self.gpu_cache = GPUCache(self.cpu_cache, gpu_capacity, numdim_list,
                                typesize = typesize, randomize=randomize,
                                verbose=self.verbose, shift=shift, add_noise=add_noise,
                                center_only=not is_train, shift_amt_x=shift_amt_x, shift_amt_y=shift_amt_y)
    for i, stats_file in enumerate(stats_files):
      if hyperparameter_list[i].normalize and hyperparameter_list[i].activation != deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX:
        self.gpu_cache.SetDataStats(i, stats_file)
    self.batchsize = batchsize
    if seq:
      datasetsize = actual_datasetsize
    self.num_batches = datasetsize / batchsize
    if self.get_last_piece and datasetsize % batchsize > 0:
      self.num_batches += 1
Esempio n. 6
0
            scores[player] += marble + board[current]
            del board[current]
        else:
            current = (current + 2) % len(board)
            #board = board[:current] + blist([marble,]) + board[current:]
            board.insert(current, marble)
        player = (player + 1) % players
        if marble % int(marbles / 1000) == 0:
            print(marble / int(marbles / 100), "%", marble, current)
    return max(scores)


def part1(players, marbles):
    util.Answer(1, Play(players, marbles))


def part2(players, marbles):
    util.Answer(2, Play(players, marbles))


if __name__ == "__main__":
    #print(Play(9, 25), 32)
    print(Play(13, 7999), 146373)

    players, marbles = util.ReadData(9)[0].split(
        " players; last marble is worth ")
    players, marbles = int(players), int(marbles.split()[0])
    print("Players:", players, "Marbles:", marbles)
    part1(players, marbles)
    part2(players, marbles * 100)
Esempio n. 7
0
        for y in range(1, 302 - 3):
            power = gridpower(x, y, 3, grid)
            if power > max:
                max = power
                maxcell = (x, y)
    util.Answer(1, maxcell)


def part2(id):
    grid = makegrid(id)
    max = -9999999
    maxcell = None
    for cell in range(300, 1, -1):
        for x in range(1, 302 - cell):
            for y in range(1, 302 - cell):
                power = gridpower(x, y, cell, grid)
                if power > max:
                    max = power
                    maxcell = (x, y, cell)
        print(cell, maxcell, max)
    util.Answer(1, maxcell)


if __name__ == "__main__":
    print(powerlevel(122, 79, 57), -5)
    print(powerlevel(217, 196, 39), 0)

    data = int(util.ReadData(11)[0])
    part1(data)
    part2(data)
Esempio n. 8
0
import util
import deepnet_pb2
import sys, os
from google.protobuf import text_format

proto1 = sys.argv[1]
proto2 = sys.argv[2]
output_pbtxt = sys.argv[3]

out_dir = '/'.join(output_pbtxt.split('/')[:-1])
if out_dir and not os.path.isdir(out_dir):
    os.makedirs(out_dir)
dataset1 = util.ReadData(proto1)
name1 = dataset1.name
dataset2 = util.ReadData(proto2)
name2 = dataset2.name

dataset1_prefix = dataset1.prefix
dataset2_prefix = dataset2.prefix
prefix = os.path.commonprefix([dataset1_prefix, dataset2_prefix])

if dataset1_prefix != dataset2_prefix:
    for dataset in [dataset1, dataset2]:
        _prefix = dataset.prefix[len(prefix):]
        for d in dataset.data:
            if d.file_pattern:
                d.file_pattern = os.path.join(_prefix, d.file_pattern)
            if d.stats_file:
                d.file_pattern = os.path.join(_prefix, d.stats_file)

dataset1.MergeFrom(dataset2)
Esempio n. 9
0
import util
		

def part1(data):
	x = sorted(data, key=lambda coord: coord[1][0])
	y = sorted(data, key=lambda coord: coord[1][1])
	for coord in x:
		print(coord)
	util.Answer(1, None)

		
def part2(data):
	util.Answer(2, None)


if __name__ == "__main__":
	data = util.ReadData(6)
	coords = []
	for i, line in enumerate(data):
		x, y = line.strip().split(', ')
		coords.append((i, (int(x),int(y))))
	part1(coords)
	part2(coords)
	
	
Esempio n. 10
0
from typing import List
import util


def part1(inputs: List[int]):
    freq = 0
    for inp in inputs:
        freq += inp
    util.Answer(1, freq)


def part2(inputs: List[int]):
    freq = 0
    visited = {0: 1}
    while visited[freq] == 1:
        for inp in inputs:
            freq += inp
            if freq in visited:
                visited[freq] += 1
                break
            visited[freq] = 1
    util.Answer(2, freq)


if __name__ == "__main__":
    inputs = [int(line) for line in util.ReadData(1)]
    part1(inputs)
    part2(inputs)
Esempio n. 11
0
        points.append(PointWithVelocity(x, y, dx, dy))
    return points


def Range(points):
    x = [p.pos.x for p in points]
    y = [p.pos.y for p in points]
    return max(x), min(x), max(y), min(y)


if __name__ == "__main__":
    with open("data/puzzle_10_test.txt") as f:
        testdata = f.read().split('\n')
    testdata = parsedata(testdata)

    data = util.ReadData(10)
    points = parsedata(data)

    seconds = 0
    maxx, minx, maxy, miny = Range(points)
    lastxrange = maxx - minx + 1
    while lastxrange > maxx - minx:
        lastxrange = maxx - minx
        for p in points:
            p.Step()
        seconds += 1
        maxx, minx, maxy, miny = Range(points)

    for p in points:
        p.Step(-1)
Esempio n. 12
0
		root.children.append(child)
	
	root.meta = data[offset:offset+metacount]
	offset += metacount
	metasum += sum(root.meta)
	
	return root, offset, metasum

def part1(data):
	_, _, metasum = ReadNode(data)
	util.Answer(1, metasum)

		
def part2(data):
	root, _, _ = ReadNode(data)
	util.Answer(2, root.value())


if __name__ == "__main__":
	testdata = "2 3 0 3 10 11 12 1 1 0 1 99 2 1 1 2".split(' ')
	testdata = list(int(v) for v in testdata)
	part1(testdata)
	part2(testdata)
	
	data = util.ReadData(8)[0].split(' ')
	data = list(int(v) for v in data)
	part1(data)
	part2(data)
	
	
Esempio n. 13
0

def part2(letters):
    letters = react(letters)  # start from the reacted string

    letter = None
    length = None
    for c in string.ascii_lowercase:
        tmp = list(l for l in letters if not same(l, c))
        count = len(react(tmp))
        if letter is None or count < length:
            letter = c
            length = count
    util.Answer(2, (letter, length))


if __name__ == "__main__":
    assert will_react('a', 'A')
    assert will_react('A', 'a')
    assert not will_react('a', 'a')

    assert same('a', 'A')
    assert same('A', 'a')
    assert same('a', 'a')

    data, = util.ReadData(5)
    letters = list(c for c in data)  # bust the string out into a list

    part1(letters)
    part2(letters)
Esempio n. 14
0
		hasTwo = False
		hasThree = False
		for count in counts.values():
			hasTwo |= (count == 2)
			hasThree |= (count == 3)

		two += hasTwo
		three += hasThree

	util.Answer(1, two*three)

def part2(inputs):
	for left in inputs:
		for right in inputs:
			if left == right:
				continue
			common = []
			for i in range(len(left)):
				if left[i] == right[i]:
					common.append(left[i])
				if len(common) < i:
					break
			if len(common) == len(left) - 1:
				util.Answer(2, "".join(common))
				return

if __name__ == "__main__":
	inputs = util.ReadData(2)
	part1(inputs)
	part2(inputs)
Esempio n. 15
0
            _, guard, _, _ = message.split(' ')
            if guard not in sleep:
                sleep[guard] = {}
        elif message == "falls asleep":
            fell_asleep_time = t
        elif message == "wakes up":
            minutes = int((t - fell_asleep_time).seconds / 60)
            for i in range(minutes):
                sleep_minute = (fell_asleep_time +
                                timedelta(minutes=i)).strftime("%H:%M")
                if sleep_minute not in sleep[guard]:
                    sleep[guard][sleep_minute] = []
                sleep[guard][sleep_minute].append(minutes)
            fell_asleep_time = None
        else:
            assert False
    for guard, sleeptimes in sleep.items():
        t = list(sleeptimes.keys())[0]
        maxsleep = (t, len(sleeptimes[t]))
        totalsleep = 0
        for t, spans in sleeptimes.items():
            totalsleep += len(spans)
            if maxsleep[1] < len(spans):
                maxsleep = (t, len(spans))
        print(guard, maxsleep, totalsleep)


if __name__ == "__main__":
    inputs = FormatData(util.ReadData(4))
    part1(inputs)