def test_listDirectory_skip(self): from string import ascii_uppercase SNAPSHOT_ID = 'listDirectory_nested' SUBDIR = 'subdir' FILENAME1 = 'nested.txt' FILENAME2 = 'another.txt' FILENAME3 = 'another.bak' site, tool, ctx = self._makeOne(SNAPSHOT_ID) self._makeFile(tool, SNAPSHOT_ID, FILENAME1, printable_bytes, subdir=SUBDIR) self._makeFile(tool, SNAPSHOT_ID, FILENAME2, ascii_uppercase.encode('utf-8'), subdir=SUBDIR) self._makeFile(tool, SNAPSHOT_ID, FILENAME3, b'abc', subdir=SUBDIR) names = ctx.listDirectory(SUBDIR, skip=(FILENAME1, ), skip_suffixes=('.bak', )) self.assertEqual(len(names), 1) self.assertFalse(FILENAME1 in names) self.assertTrue(FILENAME2 in names) self.assertFalse(FILENAME3 in names)
def test_listDirectory_multiple(self): from string import ascii_uppercase SNAPSHOT_ID = 'listDirectory_nested' SUBDIR = 'subdir' FILENAME1 = 'nested.txt' FILENAME2 = 'another.txt' site, tool, ctx = self._makeOne(SNAPSHOT_ID) self._makeFile(tool, SNAPSHOT_ID, FILENAME1, printable_bytes, subdir=SUBDIR) self._makeFile(tool, SNAPSHOT_ID, FILENAME2, ascii_uppercase.encode('utf-8'), subdir=SUBDIR) names = ctx.listDirectory(SUBDIR) self.assertEqual(len(names), 2) self.assertTrue(FILENAME1 in names) self.assertTrue(FILENAME2 in names)
class Sanitizer(ProcessChunk): '''Take a TAQ file and make it fake while preserving structure''' # These could be overriden as desired fudge_columns = ['Bid_Price', 'Bid_Size', 'Ask_Price', 'Ask_Size'] # This will preserve the fake symbol across chunks symbol_map = {} ascii_bytes = ascii_uppercase.encode('ascii') def _process_chunks(self, iterator_in): '''Return chunks with changed symbols and fudged times and values. For now, successive calls will result in a dropped chunk.''' # last_symbol = None for chunk in iterator_in: # XXX a little annoying AND undocumented that split makes # thing unwriteable. Should double-check. chunk.flags.writeable = True self.fake_symbol_replace(chunk) self.fudge_up(chunk) yield chunk def fake_symbol_replace(self, chunk, symbol_column='Symbol_root'): '''Make a new fake symbol if we don't have it yet, and return it''' real_symbol = chunk[symbol_column][0] new_fake_symbol = bytes(sample(self.ascii_bytes, len(real_symbol))) fake_symbol = self.symbol_map.setdefault(real_symbol, new_fake_symbol) chunk[symbol_column] = fake_symbol def fudge_up(self, chunk): '''Increase each entry in column by some random increment. Make sure the values stay monotonic, and don't get bigger than max_value.''' for col in self.fudge_columns: # Note that we don't worry about decimal place here - just treating # everything as an integer is fine for this purpose data = chunk[col].astype(np.int64) mean_val = np.mean(data) std_val = np.std(data) fake_data = (np.random.standard_normal(len(data)) * std_val + mean_val).astype(np.int64) # np.min wasn't working here fake_data[fake_data < 0] = 0 num_bytes = len(chunk[0][col]) fake_bytes = np.char.zfill( fake_data.astype('S{}'.format(num_bytes)), num_bytes) # this is where the side-effects happen chunk[col] = fake_bytes
def test_listDirectory_skip(self): from string import ascii_uppercase SNAPSHOT_ID = 'listDirectory_nested' SUBDIR = 'subdir' FILENAME1 = 'nested.txt' FILENAME2 = 'another.txt' FILENAME3 = 'another.bak' site, tool, ctx = self._makeOne(SNAPSHOT_ID) self._makeFile(tool, SNAPSHOT_ID, FILENAME1, printable_bytes, subdir=SUBDIR) self._makeFile(tool, SNAPSHOT_ID, FILENAME2, ascii_uppercase.encode('utf-8'), subdir=SUBDIR) self._makeFile(tool, SNAPSHOT_ID, FILENAME3, b'abc', subdir=SUBDIR) names = ctx.listDirectory(SUBDIR, skip=(FILENAME1,), skip_suffixes=('.bak',)) self.assertEqual(len(names), 1) self.assertFalse(FILENAME1 in names) self.assertTrue(FILENAME2 in names) self.assertFalse(FILENAME3 in names)
if c in ignore: continue if len(l) and c ^ l[-1] == 0x20: l.pop() else: l.append(c) return l advent.setup(2018, 5) fin = advent.get_input(mode='rb') polymer = fin.read().rstrip() trimmed = react_fast(polymer) reacted_len = len(trimmed) advent.print_answer(1, reacted_len) best_reacted_len = reacted_len for l, L in zip(ascii_lowercase.encode(), ascii_uppercase.encode()): reacted_len = len(react_fast(trimmed, {l, L})) if reacted_len < best_reacted_len: best_reacted_len = reacted_len advent.print_answer(2, best_reacted_len)
def isupper(b): return all(x in ascii_uppercase.encode() for x in b)