Python chunkify Beispiele, utils.chunkify Python Beispiele

Beispiel #1

0

Datei anzeigen

    async def list(self, ctx: commands.Context, category=None):
        """
        Show status of CTF challenges.
        Use `>chal ls [category]!` to post to channel.
        """
        ctf = self._get_ctf(ctx)

        public = False
        if category and '!' in category:
            category = category.replace('!','')
            public = True

        if not category:
            if isinstance(ctx.channel, discord.TextChannel):
                category = ctx.channel.name.split(f'{ctf.name}_')[1]
            else:
                category = 'all'

        if category not in ctf.tags:
            category = 'all'

        solved, unsolved = ctf.challenge_summary(category)
        desc = f'{category} challenges\n\n'
        if solved:
            desc += f'# solved\n{solved}\n'
        if unsolved:
            desc += f'# unsolved\n{unsolved}'

        for chunk in chunkify(desc, 1980):
            # emb = discord.Embed(title=f'{category} challenges', description=chunk, colour=4387968)
            # await ctx.message.author.send(embed=emb)
            if public:
                await ctx.send(f'```md\n{chunk}```')
            else:
                await ctx.message.author.send(f'```md\n{chunk}```')

Beispiel #2

0

Datei anzeigen

Datei: challenge8.py Projekt: fredgj/cryptopals

def find_repetitions(ciphers):
    for cipher in ciphers:
        chunks = [c for c in chunkify(cipher,32)]
        counter = Counter(chunks)
        block, repetitions = counter.most_common()[0]
        if repetitions > 1:
            return repetitions, block, cipher

Beispiel #3

0

Datei anzeigen

Datei: model.py Projekt: jaketae/auto-tagger

 def predict(self, x):
     self.eval()
     chunks = chunkify(x, self.max_len, self.min_len)
     logits = self.forward(chunks)
     mean_logits = logits.mean(dim=0)
     index = torch.where(mean_logits > 0)[0]
     result = [self.tags[i] for i in index]
     return result

Beispiel #4

0

Datei anzeigen

 def __init__(self, file_path, max_len, min_len):
     df = pd.read_csv(file_path)
     df_dict = df.to_dict("records")
     new_df = [
         set_body(row.copy(), chunk)
         for row in df_dict
         for chunk in chunkify(row["body"], max_len, min_len)
     ]
     self.data = pd.DataFrame(new_df).set_index("title")

Beispiel #5

0

Datei anzeigen

Datei: debug.py Projekt: thesadru/culturebot

    async def getsource(self, ctx: Context, command: str):
        cmd = self.bot.all_commands.get(command)
        if cmd is None:
            await ctx.send(f"Could not find `{command}`")
            return

        for chunk in chunkify(textwrap.dedent(inspect.getsource(
                cmd.callback))):
            await ctx.send(wrap(chunk, lang='py'))

Beispiel #6

0

Datei anzeigen

def do_prediction(intbl,
                  pbms,
                  gene_names,
                  filteropt="p-value",
                  filterval=0.0001,
                  spec_ecutoff=0.4,
                  nonspec_ecutoff=0.35,
                  num_threads=None):
    """
    intbl: preprocessed table
    filteropt: p-value or z-score
    filterval: # TFs for opt z-score and p-val cutoff for p-value
    """

    # intbl: #rowidx,seq,val,diff,t,pbmname,escore_seq
    start_time = time.time()

    # move the comment here for testing
    predfiles = [config.PREDDIR + "/" + pbm
                 for pbm in pbms]  # os.listdir(preddir)
    preds = utils.chunkify(
        predfiles, config.PCOUNT)  # chunks the predfiles for each process

    if filteropt == "p-value":
        filterval = float(filterval)
    else:  #z-score
        filterval = int(filterval)

    # collect the short2long_map -- shared, so only one i/o
    emap = pd.read_csv("%s/index_short_to_long.csv" % (config.ESCORE_DIR),
                       header=0,
                       index_col=0,
                       sep=',',
                       dtype='Int32')  # pd.DataFrame
    emap = np.array(
        emap[emap.columns[0]]) - 1  #emap[emap.columns[0]].to_numpy() - 1

    # --- PARALLEL PART ---
    # need to use manager here
    #shared_ready_sum = mp.Manager().Value('i', 0)
    # prepare all parameters but predlist
    predict_partial = ft.partial(
        predict, **{
            'dataset': intbl,
            'emap': emap,
            'filteropt': filteropt,
            'filterval': filterval,
            'spec_ecutoff': spec_ecutoff,
            'nonspec_ecutoff': nonspec_ecutoff,
            'num_threads': num_threads
        })

    with cc.ProcessPoolExecutor(config.PCOUNT) as executor:
        res = executor.map(predict_partial, preds)

    return postprocess(res, predfiles, gene_names, filteropt, filterval)

Beispiel #7

0

Datei anzeigen

Datei: aes.py Projekt: fredgj/cryptopals

def init(block, key, format_):
    if format_ == 'x':
        key = [chr(int(k,16)) for k in chunkify(key,2)]
        block = [chr(int(b,16)) for b in chunkify(block,2)]

    key = [ord(k) for k in key]
    keysize=len(key)*8

    if len(block) != 16:
        raise BlockSizeError('Block must be of length 16 bytes')
    if not valid_key(key,keysize):
        raise KeySizeError('Key must be either 128, 192 or 256 bit')

    state = [ord(b) for b in block]
    state = [state[i:i+4] for i in range(0, len(state)-3, 4)]

    nk = keysize//32
    nr = nk+6

    roundkey = key_expand(key, nr, nk)

    return state,roundkey,nr

Beispiel #8

0

Datei anzeigen

def init(block, key, format_):
    if format_ == 'x':
        key = [chr(int(k,16)) for k in chunkify(key,2)]
        block = [chr(int(b,16)) for b in chunkify(block,2)]

    key = [ord(k) for k in key]
    keysize=len(key)*8

    if len(block) != 16:
        raise BlockSizeError('Block must be of length 16 bytes')
    if not valid_key(key,keysize):
        raise KeySizeError('Key must be either 128, 192 or 256 bit')

    state = [ord(b) for b in block]
    state = [state[i:i+4] for i in range(0, len(state)-3, 4)]

    nk = keysize//32
    nr = nk+6

    roundkey = key_expand(key, nr, nk)

    return state,roundkey,nr

Beispiel #9

0

Datei anzeigen

def fetch_from_ensembl_genomes():
    """Queries MySQL servers hosted by Ensembl Genomes

    To connect via Terminal (e.g. to debug), run:
    mysql --user=anonymous --host=mysql-eg-publicsql.ebi.ac.uk --port=4157 -A
    """
    global time_ensembl
    t0 = time_ms()
    logger.info('Entering fetch_from_ensembl_genomes')
    connection = db_connect(host='mysql-eg-publicsql.ebi.ac.uk',
                            user='******',
                            port=4157)
    logger.info('Connected to Ensembl Genomes database')

    cursor = connection.cursor()

    db_map = {}
    org_map = {}

    # Get a list of databases we want to query for karyotype data
    cursor.execute('show databases like "%core_%"')
    for row in cursor.fetchall():
        db = row[0]
        if 'collection' in db:
            continue
        name_slug = db.split('_core')[0].replace('_', '-')
        db_map[db] = name_slug
    db_tuples = [item for item in db_map.items()]

    cursor.close()

    # Take the list of DBs we want to query for karyotype data,
    # split it into 100 smaller lists,
    # then launch a new thread for each of those small new DB lists
    # to divide up the work of querying remote DBs.
    num_threads = 100
    db_tuples_lists = chunkify(db_tuples, num_threads)
    with ThreadPoolExecutor(max_workers=num_threads) as pool:
        for result in pool.map(query_ensembl_karyotype_db, db_tuples_lists):
            for db_tuple in result:
                name_slug, asm_data = db_tuple
                if name_slug in org_map:
                    org_map[name_slug].append(asm_data)
                else:
                    org_map[name_slug] = [asm_data]

    logger.info('before exiting with clause')

    time_ensembl += time_ms() - t0
    return org_map

Beispiel #10

0

Datei anzeigen

def fetch_from_ucsc():
    """Queries MySQL instances hosted by UCSC Genome Browser

    To connect via Terminal (e.g. to debug), run:
    mysql --user=genome --host=genome-mysql.soe.ucsc.edu -A
    """
    global time_ucsc
    t0 = time_ms()
    logger.info('Entering fetch_from_ucsc')
    connection = db_connect(host='genome-mysql.soe.ucsc.edu', user='******')
    logger.info('Connected to UCSC database')
    cursor = connection.cursor()

    db_map = {}
    org_map = {}

    cursor.execute('use hgcentral')
    cursor.execute('''
      SELECT name, scientificName FROM dbDb
        WHERE active = 1
    ''')
    rows = cursor.fetchall()

    for row in rows:
        db = row[0]
        # e.g. H**o sapiens -> h**o-sapiens
        name_slug = row[1].lower().replace(' ', '-')
        db_map[db] = name_slug

    db_tuples = [item for item in db_map.items()]

    # Take the list of DBs we want to query for cytoBandIdeo data,
    # split it into 30 smaller lists,
    # then launch a new thread for each of those small new DB lists
    # to divide up the work of querying remote DBs.
    num_threads = 30
    db_tuples_lists = chunkify(db_tuples, num_threads)
    with ThreadPoolExecutor(max_workers=num_threads) as pool:
        for result in pool.map(query_ucsc_cytobandideo_db, db_tuples_lists):
            if result is None:
                continue
            asm_data = result
            if name_slug in org_map:
                org_map[name_slug].append(asm_data)
            else:
                org_map[name_slug] = [asm_data]

    time_ucsc += time_ms() - t0
    return org_map

Beispiel #11

0

Datei anzeigen

    def _delete_old_lines(self, should_delete):
        """
        Remove outdated lines from the database.
        :param should_delete: lines to be deleted
        """
        lines_ref = self._firestore.collection(u'lines')

        # split the should_delete into chunks of size 500
        # and then batch delete them
        for chunk in chunkify(should_delete, FIRESTORE_BATCH_MAXIMUM_SIZE):
            batch = self._firestore.batch()

            for line in chunk:
                batch.delete(lines_ref.document(line))
                logger.info(f'deleting outdated line {line}.')
            batch.commit()

Beispiel #12

0

Datei anzeigen

Datei: qbic_v1.py Projekt: vincentiusmartin/QBiC-Pred

def do_prediction(intbl,
                  pbms,
                  gene_names,
                  filteropt="p-value",
                  filterval=0.0001,
                  spec_ecutoff=0.4,
                  nonspec_ecutoff=0.35):
    """
    intbl: preprocessed table
    filteropt: p-value or z-score
    filterval: # TFs for opt z-score and p-val cutoff for p-value
    """

    # intbl: #rowidx,seq,val,diff,t,pbmname,escore_seq
    start_time = time.time()

    # move the comment here for testing
    predfiles = [config.PREDDIR + "/" + pbm
                 for pbm in pbms]  # os.listdir(preddir)
    preds = utils.chunkify(
        predfiles, config.PCOUNT)  # chunks the predfiles for each process

    # need to use manager here
    shared_ready_sum = mp.Manager().Value('i', 0)

    pool = mp.Pool(processes=config.PCOUNT)
    if filteropt == "p-value":
        filterval = float(filterval)
    else:  #z-score
        filterval = int(filterval)
    async_pools = [
        pool.apply_async(predict,
                         (preds[i], intbl, shared_ready_sum, filteropt,
                          filterval, spec_ecutoff, nonspec_ecutoff))
        for i in range(0, len(preds))
    ]

    total = len(predfiles)
    while not all([p.ready() for p in async_pools]):
        time.sleep(2)

    res = [p.get() for p in async_pools]
    pool.terminate()

    colnames, datavalues = postprocess(res, gene_names, filteropt, filterval)

    return colnames, datavalues

Beispiel #13

0

Datei anzeigen

Datei: debug.py Projekt: thesadru/culturebot

    async def run(self, ctx: Context, *, string: str = ''):
        """Runs python code in all codeblocks in the message."""
        env = {
            'bot': self.bot,
            'ctx': ctx,
            'channel': ctx.channel,
            'author': ctx.author,
            'guild': ctx.guild,
            'message': ctx.message,
            '_': self.last_return,
            **globals()
        }

        for code in re.findall(self._code_re, string):
            output = await self.run_code(code.strip(), env)
            for chunk in chunkify(output, newlines=True, wrapped=True):
                await ctx.send(chunk)

Beispiel #14

0

Datei anzeigen

Datei: bot.py Projekt: thesadru/culturebot

    async def on_command_error(self, ctx: Context, error: Exception):
        if isinstance(error, commands.CommandInvokeError):
            e = error.original
            if await bot.is_owner(ctx.author):
                tb = traceback.format_exception(type(error), error,
                                                error.__traceback__)
                for chunk in chunkify(tb, newlines=True, wrapped=True):
                    await ctx.send(chunk)
                return

            msg = await ctx.send(
                "We're sorry, something went wrong. Would you like to submit a bug report?"
            )
            if await confirm(bot, msg, ctx.author):
                await ctx.send('Thank you, a bug report has been sent')
                await report_bug(ctx, e)
            else:
                await msg.edit(content="We're sorry, something went wrong")

        elif isinstance(error, commands.CommandNotFound):
            if not ctx.invoked_with:
                return

            cmds = [
                name for name, command in bot.all_commands.items()
                if not command.hidden
            ]
            match = difflib.get_close_matches(ctx.invoked_with, cmds, 1)
            if match:
                await ctx.send(
                    f"Sorry I don't know what `{ctx.invoked_with}` is, did you perhaps mean `{match[0]}`?"
                )
            else:
                await ctx.send(
                    f"Sorry I don't know what `{ctx.invoked_with}` is.")

        elif isinstance(error, commands.UserInputError):
            bot.help_command.context = ctx
            signature = bot.help_command.get_command_signature(ctx.command)
            await ctx.send(error.args[0] + f'\nUsage: `{signature}`')

        elif isinstance(error, commands.CommandError):
            await ctx.send(error.args[0])

        else:
            raise error

Beispiel #15

0

Datei anzeigen

 def _save(self, lines):
     """
     Save lines scraped from the website.
     :param lines: lines to be saved
     """
     lines_ref = self._firestore.collection(u'lines')
     # split the lines in chunks and batch update them in the database
     for chunk in chunkify(lines, FIRESTORE_BATCH_MAXIMUM_SIZE):
         batch = self._firestore.batch()
         for line in chunk:
             batch.set(lines_ref.document(line.code), {
                 u'code': line.code,
                 u'name': line.name,
                 u'timetable_url': line.url,
                 u'cities': list(line.cities),
                 u'file_hash': line.file_hash
             },
                       merge=True)
             logging.info(f'saving line with code {line.code}')
         batch.commit()

Beispiel #16

0

Datei anzeigen

Datei: tracks.py Projekt: krmiddlebrook/artists_popularity

def get_tracks(self):
    """Collects track metadata from spotify and uploads it to our database.

    Retreives a list of album ids from the "albums" table in our database. Then,
    we splits this list into chunks and assigns celery tasks to each one. This
    allows our program to distribute (e.g., multi-process) tasks across a set of
    workers rather than needing to completing each chunk and their associated
    tasks in sequential order.

    """

    time = datetime.utcnow()
    time = time.replace(minute=0, second=0, microsecond=0)

    query = f"""
        SELECT DISTINCT album_id
        FROM (
            SELECT album_id, max(year) as year, max(track_count) as track_count
            FROM {config.db_schema}.{config.tables.get('albums')}
            WHERE track_count < 23 and year < 2020
            GROUP BY album_id
        ORDER BY year DESC, track_count DESC) as innerQ
        LIMIT 200000;
    """
    with config.engine.connect() as conn:
        album_ids = pd.read_sql(query, con=conn).album_id.values.tolist()

    # split the album ids list into chunks to create more memory efficient tasks
    album_ids = chunkify(album_ids)

    # create the async tasks
    tasks = group(
        (albums_tracks.s(ids, i) | tracks_audio_features.s() | push_tracks.s())
        for i, ids in enumerate(album_ids)
    )

    tracks_flow = (  # noqa: 841
        ((tasks) | drop_dup_tracks.si() | flow_complete.si(self.name, time))
        .delay()
        .get()
    )

Beispiel #17

0

Datei anzeigen

Datei: base64.py Projekt: fredgj/cryptopals

def encode(data, format_type='b'):
    index2, index3, index4 = indexes(format_type)
    chunk_size = 6 if format_type == 'x' else 3
    b64 = ''
    chunks = [c for c in chunkify(data, chunk_size)]
    for chunk in chunks:
        byte1 = chunk[0:index2]
        byte2 = chunk[index2:index3]
        byte3 = chunk[index3:index4]
        ch1=ch2=ch3=ch4=''
        ch1, ch2,bb1 = encode_byte(byte1, format_type, 2,3,4)
        
        if byte2:
            ch2,ch3,bb1 = encode_byte(byte2, format_type, 4,15,2,b1=bb1)
            if byte3:
                ch3,ch4,_ = encode_byte(byte3, format_type, 6,63, 0, b1=bb1)
            else:
                ch4 = '='
        else:
            ch3 = '=='
        b64 += ch1+ch2+ch3+ch4
    return b64

Beispiel #18

0

Datei anzeigen

Datei: challenge7.py Projekt: fredgj/cryptopals

import aes
import base64
from utils import chunkify, readfile

if __name__ == '__main__':
    data = ''.join(readfile('testdata/7.txt'))
    data = base64.decode(data)
    data = chunkify(data,16)
    key = 'YELLOW SUBMARINE'
    decrypted = ''
    for d in data: 
        decrypted += aes.decrypt(d, key)
    print(decrypted)

Beispiel #19

0

Datei anzeigen

Datei: challenge4.py Projekt: fredgj/cryptopals

def to_byte(data):
    for line in data:
        yield ''.join(chr(int(c,16)) for c in chunkify(line.strip(),2))

Beispiel #20

0

Datei anzeigen

Datei: challenge3.py Projekt: fredgj/cryptopals

import xor_crypto
from utils import chunkify

if __name__ == '__main__':
    data = '1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736'
    # hex -> byte string
    data = ''.join(chr(int(c,16)) for c in chunkify(data,2))

    key = xor_crypto.find_key(data, 1, 2)
    print(xor_crypto.decrypt(data,key))

Beispiel #21

0

Datei anzeigen

    async def roll(self, ctx, *args):
        """Rolls dice (supports algebraic notation, such as !roll 3d5+10)"""
        def parseint(x, default=0):
            try:
                return int(x or default)
            except ValueError:
                return x

        repatt = (  # !roll 3d5+10 check these dubs
            r'(?P<dice>\d+(?=[dD]))?'  #       3
            r'([Dd](?=\d))?'  #        d
            r'(?P<sides>\d+)?'  #         5
            r'(?P<mod>\s?[-\+]\s?\d+)?'  #          +10
            r'(?P<comment>.*)'  #              check these dubs
        )

        use_default_roll = True
        dice, sides, mod = (DEFAULT_ROLL_DICE_COUNT, DEFAULT_ROLL_SIDES,
                            DEFAULT_ROLL_MODIFIER)
        footer = 'Syntax: !roll 1000, !roll 3d5+7, !roll 11d9 check em'

        match = None
        if args:
            args = ' '.join(args).strip()
            match = re.match(repatt, args)

            if match:
                grps = match.groupdict()

                # Unpack comment
                comment = grps['comment'].strip()
                if comment:
                    author = ctx.message.author
                    name = getattr(author, 'nick', author.name)
                    footer = f'{author.nick or author.name}: {comment}'

                has_arithmetic = any(
                    map(lambda x: x != None,
                        [grps['dice'], grps['sides'], grps['mod']]))
                if has_arithmetic:
                    use_default_roll = False

                    # Unpack arithmetic
                    # Sides is compulsory for a match
                    sides = parseint(grps['sides'], 1)

                    # mod defaults to 0
                    mod = parseint(grps['mod'], 0)

                    # dice should default to 1
                    dice = parseint(grps['dice'], 1)

        # Check if expression is too long (too much math)
        if len(str(dice) + str(sides) + str(mod)) > 20:
            await ctx.send(f"That's just way too much work {BIRB}")
            return

        # Calc output
        res = sum(random.randint(1, sides) + mod for _ in range(dice))

        # Format output
        if is_hot_time():
            # suppose res == 12345; transform to '||12||||34||||5||'
            bigrammed = lambda s: chunkify(s, 2)
            res = ''.join(f'||{pair}||' for pair in bigrammed(str(res)))

        # Format input into algebraic notation or 0-99
        formatted = '{D}d{S}{P}{M}'.format(
            D='' if dice == 1 else dice,
            S=sides or 0,
            P=('+' if mod > 0 else '-') if mod else '',
            M=mod or '')

        if use_default_roll:
            # Assign to fallback formats
            formatted = '0-99'

        # Build embed and set comment or tip
        emb = Embed(description=f"Rolling {formatted}: **{res}**")
        if footer:
            emb.set_footer(text=footer)

        await ctx.send(embed=emb)

Beispiel #22

0

Datei anzeigen

Datei: eval.py Projekt: Saibo-creator/ContrastRetrieval

            print("sentences_a length:", len(sentences_a))
            _, batch_catchphrase_embedding_a = encoder(**encoded_batch_catchphrase_a)  # [7, 768]
            _, batch_catchphrase_embedding_b = encoder(**encoded_batch_catchphrase_b)  # [13,768]

            _, sentence_embedding_a = encoder(**encoded_sentence_a)  # [1, 768]
            _, sentence_embedding_b = encoder(**encoded_sentence_b)  # [1, 768]
        elif exp=="mean":
            sentence_indices_a = tokenizer(sentences_a, truncation=True, return_tensors="pt", padding='max_length',
                                           max_length=512 * 12)
            sentence_indices_b = tokenizer(sentences_b, truncation=True, return_tensors="pt", padding='max_length',
                                           max_length=512 * 12)

            _, batch_catchphrase_embedding_a = encoder(**encoded_batch_catchphrase_a)  # [7, 768]
            _, batch_catchphrase_embedding_b = encoder(**encoded_batch_catchphrase_b)  # [13,768]

            chunk_indices_a = chunkify(sentence_indices_a)

            chunk_indices_b = chunkify(sentence_indices_b)

            chunk_embeddings_a = encode_chunks(chunk_indices_a, encoder)
            chunk_embeddings_b = encode_chunks(chunk_indices_b, encoder)

            #################### Aggregation ######################
            sentence_embedding_a = torch.mean(chunk_embeddings_a, dim=0).unsqueeze(0)
            sentence_embedding_b = torch.mean(chunk_embeddings_b, dim=0).unsqueeze(0)

            del sentence_indices_a,sentence_indices_b,_,
            del chunk_indices_a, chunk_indices_b, chunk_embeddings_a, chunk_embeddings_b

        left_left = torch.cdist(sentence_embedding_a, batch_catchphrase_embedding_a, p=2.0)  # [1, 768]*[7, 768]=[1, 7]
        left_right = torch.cdist(sentence_embedding_a, batch_catchphrase_embedding_b,