def test_listDirectory_skip(self):

        from string import ascii_uppercase

        SNAPSHOT_ID = 'listDirectory_nested'
        SUBDIR = 'subdir'
        FILENAME1 = 'nested.txt'
        FILENAME2 = 'another.txt'
        FILENAME3 = 'another.bak'

        site, tool, ctx = self._makeOne(SNAPSHOT_ID)
        self._makeFile(tool,
                       SNAPSHOT_ID,
                       FILENAME1,
                       printable_bytes,
                       subdir=SUBDIR)
        self._makeFile(tool,
                       SNAPSHOT_ID,
                       FILENAME2,
                       ascii_uppercase.encode('utf-8'),
                       subdir=SUBDIR)
        self._makeFile(tool, SNAPSHOT_ID, FILENAME3, b'abc', subdir=SUBDIR)

        names = ctx.listDirectory(SUBDIR,
                                  skip=(FILENAME1, ),
                                  skip_suffixes=('.bak', ))
        self.assertEqual(len(names), 1)
        self.assertFalse(FILENAME1 in names)
        self.assertTrue(FILENAME2 in names)
        self.assertFalse(FILENAME3 in names)
    def test_listDirectory_multiple(self):

        from string import ascii_uppercase

        SNAPSHOT_ID = 'listDirectory_nested'
        SUBDIR = 'subdir'
        FILENAME1 = 'nested.txt'
        FILENAME2 = 'another.txt'

        site, tool, ctx = self._makeOne(SNAPSHOT_ID)
        self._makeFile(tool,
                       SNAPSHOT_ID,
                       FILENAME1,
                       printable_bytes,
                       subdir=SUBDIR)
        self._makeFile(tool,
                       SNAPSHOT_ID,
                       FILENAME2,
                       ascii_uppercase.encode('utf-8'),
                       subdir=SUBDIR)

        names = ctx.listDirectory(SUBDIR)
        self.assertEqual(len(names), 2)
        self.assertTrue(FILENAME1 in names)
        self.assertTrue(FILENAME2 in names)
Exemple #3
0
class Sanitizer(ProcessChunk):
    '''Take a TAQ file and make it fake while preserving structure'''

    # These could be overriden as desired
    fudge_columns = ['Bid_Price', 'Bid_Size', 'Ask_Price', 'Ask_Size']

    # This will preserve the fake symbol across chunks
    symbol_map = {}
    ascii_bytes = ascii_uppercase.encode('ascii')

    def _process_chunks(self, iterator_in):
        '''Return chunks with changed symbols and fudged times and values.

        For now, successive calls will result in a dropped chunk.'''
        # last_symbol = None
        for chunk in iterator_in:
            # XXX a little annoying AND undocumented that split makes
            # thing unwriteable. Should double-check.
            chunk.flags.writeable = True
            self.fake_symbol_replace(chunk)
            self.fudge_up(chunk)

            yield chunk

    def fake_symbol_replace(self, chunk, symbol_column='Symbol_root'):
        '''Make a new fake symbol if we don't have it yet, and return it'''
        real_symbol = chunk[symbol_column][0]
        new_fake_symbol = bytes(sample(self.ascii_bytes, len(real_symbol)))
        fake_symbol = self.symbol_map.setdefault(real_symbol, new_fake_symbol)

        chunk[symbol_column] = fake_symbol

    def fudge_up(self, chunk):
        '''Increase each entry in column by some random increment.

        Make sure the values stay monotonic, and don't get bigger than
        max_value.'''

        for col in self.fudge_columns:
            # Note that we don't worry about decimal place here - just treating
            # everything as an integer is fine for this purpose
            data = chunk[col].astype(np.int64)
            mean_val = np.mean(data)
            std_val = np.std(data)
            fake_data = (np.random.standard_normal(len(data)) * std_val +
                         mean_val).astype(np.int64)
            # np.min wasn't working here
            fake_data[fake_data < 0] = 0

            num_bytes = len(chunk[0][col])
            fake_bytes = np.char.zfill(
                fake_data.astype('S{}'.format(num_bytes)), num_bytes)

            # this is where the side-effects happen
            chunk[col] = fake_bytes
    def test_listDirectory_multiple(self):

        from string import ascii_uppercase

        SNAPSHOT_ID = 'listDirectory_nested'
        SUBDIR = 'subdir'
        FILENAME1 = 'nested.txt'
        FILENAME2 = 'another.txt'

        site, tool, ctx = self._makeOne(SNAPSHOT_ID)
        self._makeFile(tool, SNAPSHOT_ID, FILENAME1, printable_bytes,
                       subdir=SUBDIR)
        self._makeFile(tool, SNAPSHOT_ID, FILENAME2,
                       ascii_uppercase.encode('utf-8'), subdir=SUBDIR)

        names = ctx.listDirectory(SUBDIR)
        self.assertEqual(len(names), 2)
        self.assertTrue(FILENAME1 in names)
        self.assertTrue(FILENAME2 in names)
    def test_listDirectory_skip(self):

        from string import ascii_uppercase

        SNAPSHOT_ID = 'listDirectory_nested'
        SUBDIR = 'subdir'
        FILENAME1 = 'nested.txt'
        FILENAME2 = 'another.txt'
        FILENAME3 = 'another.bak'

        site, tool, ctx = self._makeOne(SNAPSHOT_ID)
        self._makeFile(tool, SNAPSHOT_ID, FILENAME1, printable_bytes,
                       subdir=SUBDIR)
        self._makeFile(tool, SNAPSHOT_ID, FILENAME2,
                       ascii_uppercase.encode('utf-8'), subdir=SUBDIR)
        self._makeFile(tool, SNAPSHOT_ID, FILENAME3, b'abc', subdir=SUBDIR)

        names = ctx.listDirectory(SUBDIR, skip=(FILENAME1,),
                                  skip_suffixes=('.bak',))
        self.assertEqual(len(names), 1)
        self.assertFalse(FILENAME1 in names)
        self.assertTrue(FILENAME2 in names)
        self.assertFalse(FILENAME3 in names)
Exemple #6
0
        if c in ignore:
            continue

        if len(l) and c ^ l[-1] == 0x20:
            l.pop()
        else:
            l.append(c)

    return l


advent.setup(2018, 5)
fin = advent.get_input(mode='rb')

polymer = fin.read().rstrip()
trimmed = react_fast(polymer)
reacted_len = len(trimmed)

advent.print_answer(1, reacted_len)

best_reacted_len = reacted_len

for l, L in zip(ascii_lowercase.encode(), ascii_uppercase.encode()):
    reacted_len = len(react_fast(trimmed, {l, L}))

    if reacted_len < best_reacted_len:
        best_reacted_len = reacted_len

advent.print_answer(2, best_reacted_len)
Exemple #7
0
def isupper(b):
    return all(x in ascii_uppercase.encode() for x in b)