Esempio n. 1
0
    def data_to_batches(self):
        """
        Create batches from data stored in an (numpy) array.
        """

        self.num_batches = int((self.data.size - self.history_size) /
                               (self.batch_size * self.seq_length))

        # Print an error message when the data array is too small
        if self.num_batches == 0:
            assert False, "ERROR: Cannot create batches ==> data size={}, \
             batch size={}, segment size={}".format(self.data.size,
                                                    self.batch_size,
                                                    self.seq_length)

        self.data = self.data[:(self.num_batches * self.batch_size *
                                self.seq_length) + self.history_size]

        # Remove the last words in the input chunk and shift the target words
        input = self.data[:-1]
        target = np.copy(self.data)
        target = target[self.history_size:]

        input = np.array(
            chunk(input,
                  (self.num_batches * self.seq_length) + self.history_size - 1,
                  overlap=self.history_size - 1))
        target = np.array(
            chunk(target, (self.num_batches * self.seq_length), overlap=0))

        self.input = chunk(input,
                           self.seq_length + self.history_size - 1,
                           overlap=self.history_size - 1)
        self.target = chunk(target, self.seq_length, overlap=0)
Esempio n. 2
0
def fairy_upload():
    data = request.data
    header = data[:(32 + 64)].decode()
    Cached = cache_uploadtoken.get(".".join(["fairy", "security", "checkinfo", header[:32]]))
    if not Cached:
        raise Exceptions.InvalidToken()
    imagehex = header[32:]
    if model.gettexture_hash(imagehex):
        return Response(json.dumps({
            "error": "ForbiddenOperationException",
            "errorMessage": "Quantum repeatability does not hold here."
        }), status=403, mimetype='application/json; charset=utf-8')
    hexed = hashlib.sha256(data[(32 + 64):]).hexdigest()
    if Cached.get("sha256") != hexed:
        return Response(json.dumps({
            "error": "ForbiddenOperationException",
            "errorMessage": "Hash value does not match."
        }), status=403, mimetype='application/json; charset=utf-8')
    size = Cached.get("size")
    height = size.get("height")
    width = size.get("width")

    if len(data) - (32 + 64 + 8) != ((height * width) * 4):
        return Response(json.dumps({"error": "ForbiddenOperationException","errorMessage": "Parsing does not provide sufficient amount of bytes"}), status=403, mimetype='application/json; charset=utf-8')
    if (len(data) - (32 + 64)) % 4 != 0:
        return Response(json.dumps({"error": "ForbiddenOperationException","errorMessage": "No correct encoded image."}), status=403, mimetype='application/json; charset=utf-8')
    if not ((height % 32 == 0) or (height % 17 == 0)) and ((width % 64 == 0) or (width % 22 == 0)):
        return Response(json.dumps({"error": "ForbiddenOperationException","errorMessage": "No correct encoded image."}), status=403, mimetype='application/json; charset=utf-8')
        
    if height % 17 == 0:
        height = int(width / 17) * 32
    
    if width % 22 == 0:
        width = int(width / 22) * 32

    image = Image.new('RGBA', (width, height), (255, 255, 255, 255))
    draw = ImageDraw.Draw(image)
    dots = utils.chunk(list(data[(32 + 64):]), 4)[2:]
    chunks = utils.chunk(dots, height)
    for x in range(len(chunks)):
        for y in range(len(chunks[x])):
            draw.point((x, y), fill=(chunks[x][y][1], chunks[x][y][2], chunks[x][y][3], chunks[x][y][0]))
    image.save("".join(["./data/texture/", imagehex, ".png"]), "PNG")
    
    skintype = Cached.get("type")
    
    if skintype == "SKIN" and height % 64 == 0:
        skinmodel = ["STEVE", "ALEX"][sv3d.isSilmSkin(image)]
    texture = model.textures(
        userid=Token.gettoken_strict(Cached.get("accessToken")).get("user"),
        photoname=Cached.get("name"),
        height=height,
        width=width,
        model=skinmodel,
        type=skintype,
        hash=hexed
    )
    texture.save()
    return Response(model.kf_format_textures(texture), mimetype='application/json; charset=utf-8')
Esempio n. 3
0
 def parse(self, raw_data):
     chunks = raw_data.split('$')
     self.name = chunks[0]
     self.timeout = utils.chunk(chunks,
                                1,
                                mapping=utils.convert_string_to_integer)
     self.begin_time = utils.chunk(chunks,
                                   2,
                                   mapping=utils.convert_string_to_integer)
     self.end_time = utils.chunk(chunks,
                                 3,
                                 mapping=utils.convert_string_to_integer)
     self.begin_date = utils.chunk(chunks,
                                   4,
                                   mapping=utils.convert_string_to_integer)
     self.end_date = utils.chunk(chunks,
                                 5,
                                 mapping=utils.convert_string_to_integer)
     self.begin_lock_date = utils.chunk(
         chunks, 6, mapping=utils.convert_string_to_integer)
     self.end_lock_date = utils.chunk(
         chunks, 7, mapping=utils.convert_string_to_integer)
     self.day_mask = utils.chunk(chunks,
                                 8,
                                 mapping=utils.convert_string_to_integer)
Esempio n. 4
0
def ParallelLSTDQ(D,env,w,damping=0.001,ncpus=None):
    """
    D : source of samples (s,a,r,s',a')
    env: environment contianing k,phi,gamma
    w : weights for the linear policy evaluation
    damping : keeps the result relatively stable 
    ncpus : the number of cpus to use
    """

    if ncpus:
        nprocess = ncpus
    else:
        nprocess = cpu_count()

    pool = Pool(nprocess)
    indx = chunk(len(D),nprocess)
    results = []
    for (i,j) in indx:
        r = pool.apply_async(dict_loop,(D[i:j],env,w,0.0)) # note that damping needs to be zero here
        results.append(r)
        
    k = len(w)
    A = sp.identity(k,format='csr') * damping
    b = sp_create(k,1,'csr')
    for r in results:
        T,t = r.get()
        A = A + T
        b = b + t

    # close out the pool of workers
    pool.close()
    pool.join()

    w,info = solve(A,b,method="spsolve")
    return A,b,w,info
Esempio n. 5
0
async def osu_toprange(ctx: Context, rankstart: int = 1, rankend: int = 10, *, u: Optional[str] = None):
    if rankstart < 1 or rankend < 1 or rankend > 100 or rankstart > rankend or rankend - rankstart >= 30:
        return await ctx.send('invalid score rank range (max 30 scores, ranks must be between 1-100) ')
    if not u:
        u = get_osuid(ctx)
    if not u:
        return await ctx.send('invalid user')
    topScores = get_top_scores(u, rankend)
    if not topScores:
        return await ctx.send(f'No top scores found for user {u}. Make sure to provide a valid osu username/id.')
    scores = topScores[rankstart - 1: rankend]
    chunkedScores = chunk(scores, 10)
    user = get_user(u)
    first = True
    for scoreChunk in chunkedScores:
        toprangeEmbed = Embed(
            type='rich',
            color=EMBED_COLOR,
            description='\n'.join(map(format_score_inline, scoreChunk))
        )
        if first:
            toprangeEmbed.set_author(
                name=f'Top {rankstart} - {rankend} scores for {user["username"]}',
                url=osu.profile_link(user["user_id"]),
                icon_url=osu.profile_thumb(user["user_id"]),
            )
            first = False
        await ctx.send(embed=toprangeEmbed)
Esempio n. 6
0
def roll_list(l, n):
    res = []
    l = list(chunk(l, n-1))
    for i in range(n-1):
        for j in range(n):
            res.append(l[j][(i+j)%(n-1)])
    return res
Esempio n. 7
0
    def get_batch_stream(self,batch_size,n_repeat):
        '''
        生成器 在DataSource::
        :param batch_size:
        :param n_repeat: 重复遍历数据集次数
        :yield: Xs,dict field_name:Sparse_Input or Dense_Input
                 Ys array label
        '''
        n_repeat = n_repeat if n_repeat > 0 else sys.maxsize
        for _ in range(n_repeat):
            random.shuffle(self._lines)
            for chunk in utils.chunk(self._lines,batch_size):
                Xs={}
                Ys=[]
                for field_name in CATEGORY_FIELDS:
                    Xs[field_name]=SparseInput(len(chunk),[],[],[])

                for field_name in DENSE_FIELDS:
                    #dense input 对应的是[[example1_value],[example2_value]]
                    Xs[field_name]=[]

                for example_id,line in enumerate(chunk):
                    features,label=self.parse_line(line)
                    Ys.append(label)
                    for field in CATEGORY_FIELDS:
                        '''
                        这里主要是针对缺失值,产生的实际效果是缺失的都设为0
                        '''
                        if field in features:
                            Xs[field].add(example_id,features[field],1)

                    for field in DENSE_FIELDS:
                        Xs[field].append([features[field]])

                yield Xs,np.asarray(Ys)
Esempio n. 8
0
def wreath_yor_par(alpha, _parts, prefix='/local/hopan/', par=8):
    '''
    alpha: weak partition of 8 into 3 parts?
    _parts: list of partitions of each part of alpha
    Return a dict mapping group elmeent in S_8 -> rep
    The rep actually needs to be a dictionary of tuples (i, j) -> matrix
    where the i, j denote the i, j block in the matrix.
    Ex:
        alpha = (0, 0, 0, 0, 1, 1, 1, 1)
        _parts = [(2,2), (3,1)]
    '''
    #print('Wreath yor with {} processes'.format(par))
    n = sum(alpha)
    _sn = perm2.sn(n, prefix)
    young_sub = young_subgroup_perm(alpha)
    young_sub_set = tup_set(young_sub)
    young_yor = young_subgroup_yor(alpha, _parts, os.path.join(prefix, 'irreps'))
    reps = coset_reps(_sn, young_sub)

    sn_chunks = chunk(_sn, par)
    manager = Manager()
    rep_dict = manager.dict()
    nprocs = []

    for i in range(par):
        perms = sn_chunks[i]
        proc = Process(target=_proc_yor, args=[perms, young_yor, young_sub_set, reps, rep_dict])
        nprocs.append(proc)

    for p in nprocs:
        p.start()
    for p in nprocs:
        p.join()

    return rep_dict
Esempio n. 9
0
def _batch_by_source(pairs, *, batch_size=None):
    by_source_length = lambda pair: len(pair[0])
    pairs = sorted(pairs, key=by_source_length)

    return tuple(
        _make_minibatch_pair(pairs)
        for _, batch in groupby(pairs, key=by_source_length)
        for pairs in chunk(batch, batch_size))
Esempio n. 10
0
def generate(seed=b'That day',
             n=6,
             max_len=(1000 * 5),
             show_metrics=True) -> Generator[bytes, None, None]:
    start = time()
    tokens: List[bytes] = [m.group(0) for m in list(chunk(seed))[-n:]]
    no_tokens = len(tokens)
    succ = np.array([0 for _ in range(n + 1)], dtype='uint32')
    ps: Dict[bytes, float] = get_chunk_ps()
    chunks: ndarray = np.array(list(ps.keys()))
    chunk_ps: ndarray = np.array(list(ps.values()))

    with ThreadPool(max_workers=NO_CPUS,
                    thread_name_prefix='markov/w') as pool:
        lookup = ChainMap(*[
            task.result() for task in
            [pool.submit(fn=get_nchunks_ps, n=i) for i in range(n, 0, -1)]
        ])
    yield seed

    # token generation
    while no_tokens * AVG_CHUNK_LEN < max_len:
        found = False
        for m in range(n, 0, -1):
            ngram = tuple(tokens[-m:])
            maybe_ps: Optional[Dict[bytes, float]] = lookup.get(ngram, None)
            if maybe_ps is not None and len(maybe_ps) > 1:
                found = True
                succ[m] += 1
                next_chunk: bytes = choice(
                    a=list(maybe_ps.keys()),
                    p=list(maybe_ps.values()),
                )
                yield next_chunk
                tokens.append(next_chunk)
                tokens = tokens[-n:]
                break
        if not found:
            succ[0] += 1
            next_chunk = choice(a=chunks, p=chunk_ps)
            yield next_chunk
            tokens.append(next_chunk)
            tokens = tokens[-n:]
        no_tokens += 1

    if show_metrics:
        # metrics
        log.info('-' * (1 + 6 + 15 + 2))
        log.info('%9s%s' % (' ', 'METRICS'))
        log.info('-' * (1 + 6 + 15 + 2))
        log.info('%-1s %-6s %-15s' % ('#', 'PROB', 'NO EXAMPLES'))
        log.info('%-1s %-6s %-15s' % ('-' * 1, '-' * 6, '-' * 15))
        no_gen_tokens: int = sum(succ)
        for i in range(n, -1, -1):
            log.info('%-1d %-6.4f %-15d' %
                     (i, succ[i] / no_gen_tokens, succ[i]))

    log.debug(f'[finished] generating text (took {time() - start:4.2f}s)')
Esempio n. 11
0
    def test(self, iter_no):
        write_log_file(self.log_path, "Start to testing ...")
        test_query_ids = self.text_data.split_ids['test']
        success = {1: 0, 5: 0, 10: 0}
        total_test_scores = []
        test_start = datetime.now()
        for test_chunk in chunk(test_query_ids, 100):
            one_chunk_scores = []
            for i, query_id in enumerate(test_chunk):
                rank_ids, one_row_scores = self.retrieve_rank(
                    query_id, test_chunk, self.text_data, self.code_data)
                one_chunk_scores.append(one_row_scores)
                for k in success.keys():
                    if query_id in rank_ids[:k]:
                        success[k] += 1
            total_test_scores.append(one_chunk_scores)

        write_log_file(
            self.log_path,
            "\n&Testing Iteration {}: for {} queries finished. Time elapsed = {}."
            .format(iter_no, len(test_query_ids),
                    datetime.now() - test_start))

        all_mrr = []
        for i in range(len(total_test_scores)):
            one_chunk_square_score = total_test_scores[i]
            one_chunk_square_score = np.vstack(one_chunk_square_score)
            assert one_chunk_square_score.shape[
                0] == one_chunk_square_score.shape[
                    1], "Every Chunk must be square"
            mrr_array = self.calculate_square_mrr(one_chunk_square_score)
            all_mrr.extend(mrr_array)
        mrr = np.array(all_mrr).mean()
        self.test_iter.append(iter_no)
        self.test_mrr.append(mrr)
        write_log_file(
            self.log_path,
            "&Testing Iteration {}: MRR = &{}&".format(iter_no, mrr))

        for k, v in success.items():
            value = v * 1.0 / len(test_query_ids)
            write_log_file(
                self.log_path, "&Testing Iteration {}: S@{}@ = &{}&".format(
                    iter_no, k, value))
            if k == 1:
                self.test_s1.append(value)
            elif k == 5:
                self.test_s5.append(value)
            elif k == 10:
                self.test_s10.append(value)
            else:
                print('cannot find !')
        write_log_file(
            self.log_path,
            "S@1, S@5, S@10\n{}, {}, {}".format(self.test_s1[-1],
                                                self.test_s5[-1],
                                                self.test_s10[-1]))
Esempio n. 12
0
def detector(black_box, block_size=16):
    pt = array('B', 'A' * (block_size * 3))
    ct = black_box(pt)
    chunks = chunk(block_size, ct)
    nxt = chunks.next()
    for block in chunks:
        if len(block) and block == nxt:
            return 'ECB'
        nxt = block
    return 'NOT ECB'
Esempio n. 13
0
def detector(black_box, block_size=16):
    pt = array('B', 'A' * (block_size * 3))
    ct = black_box(pt)
    chunks = chunk(block_size, ct)
    nxt = chunks.next()
    for block in chunks:
        if len(block) and block == nxt:
            return 'ECB'
        nxt = block
    return 'NOT ECB'
Esempio n. 14
0
def convert_in_parallel():
    paths = glob.glob('./sounds/mp3/**/*.mp3')
    chunks = chunk(paths, cpu_count() * 2)
    processes = []
    for list in chunks:
        process = Process(target=convert_paths, args=(list, ))
        process.start()
        processes.append(process)

    for process in processes:
        process.join()
    print "All done!"
Esempio n. 15
0
 def remove_repeats(self, lst):
     rs_bits = []
     for bit_lst in chunk(lst, self.t):
         if 1 in bit_lst and 0 in bit_lst:
             raise BECError('Both 0 and 1 found in chunk of repeated bits')
         if 1 in bit_lst:
             rs_bits.append(1)
         elif 0 in bit_lst:
             rs_bits.append(0)
         else:
             rs_bits.append(None)
     return rs_bits
Esempio n. 16
0
File: 20.py Progetto: marcusf/aoc20
def make_pictures(grids, ordering, width):
    grid = {title: grid for (title, grid, _, _) in grids}
    pp = [row for i, row in enumerate(utils.chunk(ordering, width))]
    joined = [[crop(grid[x]) for x in row] for row in pp]

    picture = []
    for blocks in joined:
        for i in range(len(blocks[0])):
            picture.append(''.join([block[i] for block in blocks]))

    return [flipv(fliph(rot90(picture))), flipv(picture), fliph(picture), \
            flipv(fliph(picture)), rot90(picture),\
            fliph(rot90(picture)), flipv(rot90(picture)),picture]
Esempio n. 17
0
async def get_lots_of_contributors(org: str, repos: [str]):
    results = defaultdict(int)
    with yaspin(text=f"Fetching all {org} contributors..") as spinner:
        tasks = [get_contributors_by_org_repo(org, repo) for repo in repos]
        pieces = chunk(tasks, 20)
        for piece in pieces:
            repos = await asyncio.gather(*piece)
            flat_repos = reduce(lambda x, y: x + y, repos)
            for contributor in flat_repos:
                results[contributor['login']] += contributor['contributions']
            await asyncio.sleep(1)
        spinner.ok('✅ ')

    return results
Esempio n. 18
0
    def decode(self, received):
        rs_bits = self.remove_repeats(received)
        rs_code = [0] * (len(rs_bits) // self.c)
        erase_pos = []
        for idx, char_lst in enumerate(chunk(rs_bits, self.c)):
            if None in char_lst:
                erase_pos.append(idx)
            else:
                rs_code[idx] = lst_to_int(char_lst)
        
        raw_none = received.count(None) if isinstance(received, list) else 0
        contract_none = rs_bits.count(None)
        erased_count = len(erase_pos)
#        print("Decode: Raw None = " + str(raw_none) + " / " + str(len(received)) + " \t Contracted None = " + str(contract_none) + " / " + str(len(rs_bits)) + " \t Erased characters = " + str(erased_count) + " / " + str(len(rs_code)) + " = " + "{0:.3f}".format(erased_count/len(rs_code)))
        
        return self._codec.decode(rs_code, erase_pos=erase_pos, only_erasures=True)
Esempio n. 19
0
async def osu_leaderboard(ctx: Context, *, modeString: Optional[str] = '0'):
    mode = get_mode(modeString)
    if mode is None:
        return await ctx.send(f'Invalid gamemode {modeString}')
    gid = ctx.guild.id
    userData = backend.read_all_data(backend.USER_DATA)
    guildUsers: List[osu.User] = []
    for uid, userData in userData.items():
        if 'osuid' not in userData or 'guilds' not in userData:
            continue
        registeredGuilds = userData['guilds']
        if gid in registeredGuilds:
            user = get_user(userData['osuid'], mode)
            if user:
                guildUsers.append(user)
            else:
                await ctx.send(
                    f'Profile retrieval failed for user {osu.profile_link(userData["osuid"])} <@{uid}>'
                )
    guildUsers.sort(key=lambda user: (int(user['pp_rank'] or 0) or float('inf'), -float(user['level'] or 0)))
    chunksize = 10
    chunkedGuildUsers = chunk(guildUsers, chunksize)

    first = True
    for cidx, userChunk in enumerate(chunkedGuildUsers):
        leaderboardRows = []
        for i, user in enumerate(userChunk):
            leaderboardRows.append(
                f'**#{(cidx * chunksize) + i + 1}** '
                f'{flag(user["country"])} [{user["username"]}]({osu.profile_link(user["user_id"])}) - '
                f'#{int(user["pp_rank"] or 0):n} | '
                f'{float(user["pp_raw"] or 0):n}pp | '
                f'LVL {float(user["level"] or 0):.2f}'
            )

        leaderboardEmbed = Embed(
            type='rich',
            color=EMBED_COLOR,
            description='\n'.join(leaderboardRows)
        )
        if first:
            leaderboardEmbed.set_author(
                name=f'{osu.MODE_STRING_ENUM[mode]} leaderboard for {ctx.guild.name}',
                icon_url=str(ctx.guild.icon_url) or Embed.Empty,
            )
            first = False
        await ctx.send(embed=leaderboardEmbed)
Esempio n. 20
0
def test_images_generator(test_dir, max_target_image_size):
    def preprocess_image(image):
        image, _ = preprocessing.square_padding(image, [])
        image = resize_image_if_neccessary(image, max_target_image_size)
        return np.asarray(image)

    def load_image(image_name):
        image = Image.open(path.join_path(test_dir, image_name))
        return image_name, np.asarray(image), preprocess_image(image)

    image_names = path.list_all_images(test_dir)

    for names in utils.chunk(image_names, 1):
        if not names:
            break

        yield list(zip(*map(load_image, names)))
Esempio n. 21
0
def main(args):
    print('args: {}'.format(args))
    print('split dir: {}'.format(args.splitdir))
    print('pkl dir  : {}'.format(args.pkldir))
    print('save dir : {}'.format(args.savedir))
    print('Evaluating irrep: {}'.format(args.alpha))

    if not os.path.exists(args.savedir):
        os.makedirs(args.savedir)
    split_files = [
        os.path.join(args.splitdir, f) for f in os.listdir(args.splitdir)
        if args.suffix in f
    ]
    split_chunks = chunk(split_files, args.par)
    parts = ast.literal_eval(args.parts)
    alpha = ast.literal_eval(args.alpha)
    #assert all(sum(parts[i]) == alpha[i] for i in range(len(parts))), 'Invalid partition for alpha!'
    print('About to full transform: {}'.format(split_chunks))
    full_transform(args, alpha, parts, split_chunks)
Esempio n. 22
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("image_dir", help="path to directory with images")
    parser.add_argument("save_prefix", help="start of saved file names")
    parser.add_argument("--save_dir",
                        help="directory to save outputs",
                        default="pose_data")
    parser.add_argument("--threads",
                        help="number of threads to use",
                        type=int,
                        default=8)
    parser.add_argument("--max_frames",
                        help="maximum frames to compute",
                        type=int,
                        default=None)

    args = parser.parse_args()

    pool = Pool(processes=args.threads)

    def to_full_path(file_name):
        return os.path.join(args.image_dir, file_name)

    paths = list(map(to_full_path, get_ordered_files(args.image_dir)))
    if args.max_frames is not None:
        paths = paths[:args.max_frames]

    poses_and_subsets = pool.map(get_poses, chunk(paths, args.threads))

    print("after pool")

    full_poses = []
    full_subsets = []

    for poses, subsets in poses_and_subsets:
        full_poses.extend(poses)
        full_subsets.extend(subsets)

    to_iter = (("poses", full_poses), ("subsets", full_subsets))

    for name, values in to_iter:
        np.save(os.path.join(args.save_dir, args.save_prefix + name), values)
Esempio n. 23
0
    def parseLines(self, lines):
        """
        Parse each line in chunks of nseq + 2 items. Each block contains:
        - nseq sequence lines
        - one line for alignment outcome (skipped)
        - a separator line among blocks (skipped)
        """
        sequences = {}

        for block in utils.chunk(self._skipHeader(lines), self.nseq + 2):
            for line in block[0:self.nseq]:
                key, rawSequence, bases = self._parseLine(line)
                sequence = sequences.get(key, AlignmentSequence(key)).extend(
                    rawSequence, bases)

                if not key in sequences:
                    sequences[key] = sequence

        return sequences
Esempio n. 24
0
def write_entities_to_datastore(ds_client, entities):
    logger.debug(
        f"Writing {len(entities)} entities to Cloud Datastore for project beachbirbys..."
    )
    chunks = list(utils.chunk(entities, 500))
    logger.debug(f"Split entities into {len(chunks)} chunks.")
    for chunk in chunks:
        try:
            with ds_client.batch():
                logger.debug("Writing chunk...")
                logger.debug(chunk)
                ds_client.put_multi(chunk)
        except Exception as e:
            logger.exception(e)
            logger.error(chunk)
            logger.error(entities)
            raise
    logger.info(
        f"Wrote {len(entities)} entities to Cloud Datastore for project beachbirbys."
    )
    return
Esempio n. 25
0
    def get_batch_stream(self, batch_size, n_repeat=1):
        n_repeat = n_repeat if n_repeat > 0 else sys.maxsize

        for _ in range(n_repeat):
            random.shuffle(self._lines)

            for batch_lines in utils.chunk(self._lines, batch_size):
                Xs = {}
                ys = []

                # ------------- allocate for categorical feature
                for field in CATEGORY_FIELDS:
                    Xs[field] = SparseInput(n_total_examples=len(batch_lines),
                                            example_indices=[],
                                            feature_ids=[],
                                            feature_values=[])

                # ------------- allocate for numeric feature
                for field in DENSE_FIELDS:
                    Xs[field] = []

                # ------------- loop and add
                for example_index, line in enumerate(batch_lines):
                    # 顺序遍历,能够保证插入SparseInput中的非零元是按example_index从小到大排好序的
                    current_features, label = self.parse_line(line)
                    ys.append(label)

                    # add categorical feature
                    for field in CATEGORY_FIELDS:
                        if field in current_features:
                            Xs[field].add(example_idx=example_index,
                                          feat_id=current_features[field],
                                          feat_val=1)

                    # add numeric feature
                    for field in DENSE_FIELDS:
                        # wrap into one-element list, since we need to add one row
                        Xs[field].append([current_features[field]])

                yield Xs, np.asarray(ys)
Esempio n. 26
0
def ParallelLSTDQ(D, env, w, damping=0.001, ncpus=None):
    """
    D : source of samples (s,a,r,s',a')
    env: environment contianing k,phi,gamma
    w : weights for the linear policy evaluation
    damping : keeps the result relatively stable 
    ncpus : the number of cpus to use
    """

    if ncpus:
        nprocess = ncpus
    else:
        nprocess = cpu_count()

    pool = Pool(nprocess)
    indx = chunk(len(D), nprocess)
    results = []
    for (i, j) in indx:
        r = pool.apply_async(
            dict_loop,
            (D[i:j], env, w, 0.0))  # note that damping needs to be zero here
        results.append(r)

    k = len(w)
    A = sp.identity(k, format='csr') * damping
    b = sp_create(k, 1, 'csr')
    for r in results:
        T, t = r.get()
        A = A + T
        b = b + t

    # close out the pool of workers
    pool.close()
    pool.join()

    w, info = solve(A, b, method="spsolve")
    return A, b, w, info
Esempio n. 27
0
def ParallelLSTDQRmax(D,env,w,track,damping=0.001,rmax=1.0,ncpus=None):
    """
    D : source of samples (s,a,r,s',a')
    env: environment contianing k,phi,gamma
    w : weights for the linear policy evaluation
    track : an object that records what is known
    damping : keeps the result relatively stable (solves some difficulties with oscillation if A is singular)
    rmax : the maximum reward
    ncpus : the number of cpus to use
    """
    if ncpus:
        nprocess = ncpus
    else:
        nprocess = cpu_count()
    
    pool = Pool(nprocess)
    indx = chunk(len(D),nprocess)
    results = []
    for (i,j) in indx:
        r = pool.apply_async(drmax_loop,(D[i:j],env,w,track,0.0,rmax)) # note that damping needs to be zero here
        results.append(r)
        
    k = len(w)
    A = sp.identity(k,format='csr') * damping
    b = sp_create(k,1,'csr')
    for r in results:
        T,t = r.get()
        A = A + T
        b = b + t

    # close out the pool of workers
    pool.close()
    pool.join()

    w,info = solve(A,b,method="spsolve")
    return A,b,w,info
Esempio n. 28
0
    """ Apply the DIPCompute transform to DATA and store it. IDX used for multiprocessing. """
    data_subset = Subset(data, data_idx)
    dataloader = DataLoader(data_subset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=0,
                            pin_memory=True)
    gpu_num = idx % (torch.cuda.device_count() - 1) + 1
    comp = DIPCompute(dataloader,
                      save_dir, (3, 32, 32),
                      num_loops=CONST_NUM_LOOPS,
                      iters=iters,
                      input_noise_std=0.03,
                      device=f'cuda:{gpu_num}')
    comp.run_all()


if __name__ == '__main__':
    mp.set_start_method('spawn')
    nproc = 32
    data_idxs = list(range(len(data) // 2, len(data)))  # TODO do other half
    chks = chunk(data_idxs, nproc)
    with mp.Pool(processes=nproc) as pool:
        for chk in chks:
            results = []
            for chk_idx, chk in enumerate(chks):
                #subset_data = Subset(data, chk)
                results.append(pool.apply_async(proc_func, (chk, chk_idx)))
            for res in results:
                res.get()
Esempio n. 29
0
 def _fill_table(self, sentences, headers, max_sentence_length):
     total = len(sentences)
     chunk_size = max(total // self.MAX_CHUNK_NUM, self.MIN_CHUNK_SIZE)
     for chunk_id, chunk in enumerate(utils.chunk(sentences, chunk_size)):
         shift = chunk_id * chunk_size
         self.emit(QtCore.SIGNAL(InputDataLookup.SENTENCES_LOADED_EVENT), chunk, shift, total)
Esempio n. 30
0
File: qam.py Progetto: ger-zel/modem
    p = audiobackend.Play(channels=1, queue=q_out)
    x = utils.rand_gen(10240)
    Fcarr = 2000
    Fsampl = 8000
    K = 6
    signal = modulate(x, Fcarr, Fsampl, K)
    y = demodulate(signal, Fcarr, Fsampl, K)
    x = x.tolist()

    # print len(x)
    # print len(y)
    # print x
    # print y

    if bool(utils.contains(x[0:196], y)) == False:
    # if bool(utils.contains(x, y)) == False:
        print "data error"
    else:
        print "data ok"

    s = utils.conv_to_audio(signal)

    p.start()

    for x in utils.chunk(s,size):
        q_out.put(x)
        p.samples_ready()
    p.done()
    p.join()

Esempio n. 31
0
def load_tles(filename):
    d = {}
    for a, b, c in utils.chunk(file(filename), 3):
        name = a.strip()
        d[name] = ephem.readtle(a, b, c)
    return d
Esempio n. 32
0
def main():
    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
    usage = "usage: %prog -o -OUTPUT_DIR [options] [input, config [,starting_rules]]..."
    parser = OptionParser(usage=usage)
    parser.add_option(
        "-p",
        "--print_only",
        action="store_false",
        dest="run",
        default=True,
        help="Only shows configuration, does not run it",
    )
    parser.add_option("-o", "--output", dest="output", default=None, help="Destination")
    parser.add_option(
        "-s",
        "--starting_rules",
        action="store_true",
        dest="starting_rules",
        default=False,
        help="Script expects triples of data(input, config, starting_rules) "
        + "instead of doubles (input, config), if used.",
    )
    options, args = parser.parse_args()

    if options.output is None:
        print("No output dir specified!")
        return

    print("artifact directory:", options.output)

    chunk_size = 3 if options.starting_rules else 2

    if not args or len(args) % chunk_size != 0:
        print("Invalid number of arguments!")
        return

    i = 0
    tasks = []
    for input_file, config_file, *rest in chunk(args, chunk_size):
        print("Task", i)
        task = TaskModel()

        print("\tinput:", input_file)
        task.data_configuration = input_file

        print("\tconfig:", config_file)
        task.params_configuration = config_file

        starting_population = None
        if rest:
            starting_population = rest[0]
            print("\tstarting population:", starting_population)

        task.population_configuration = starting_population

        tasks.append(task)

    if options.run:
        print("Starting run")
        executor = SimulationExecutor()
        runner = Runner()
        for i, x in enumerate(tasks):
            run_func, configuration, population_printer = executor.prepare_simulation(
                runner, i, x.data_configuration, x.params_configuration, x.population_configuration
            )

            result = run_func(configuration)

            collected = False
            while not collected:
                try:
                    _collect_task(x, result, i, configuration, population_printer, executor, options.output)
                except PermissionError:
                    collected = False
                    print("not collected!")
                else:
                    collected = True
Esempio n. 33
0
#!/usr/bin/env python3

import sys
import utils
import dasm.assembler

if len(sys.argv) != 2:
    print('usage: asm.py program.asm')
    exit(1)

asm_filename = sys.argv[1]
asm_listing = open(asm_filename,'r').read()

word_list, statements = dasm.assembler.assemble_listing(asm_listing)

#print(word_list)
output_hex = []
for pos,chunk in enumerate(utils.chunk(word_list,8)):
    if len(chunk) < 8:
        chunk.extend([0] * (8 - len(chunk)))
    output_hex.append('{:>4x}: '.format(pos*8) + ' '.join(('{:0>4x}'.format(x) for x in chunk)))

print('\n'.join(output_hex))
output_filename = asm_filename.rsplit('.',1)[0] 
output_file = open(output_filename + '.hex','w')
output_file.write('\n'.join(output_hex) + '\n')

#for instr in instructions:
#    print('{:>4x}: '.format(instr.addr) + ' '.join(("0x{:0>4x}".format(x) for x in instr.assemble())))
Esempio n. 34
0
def main(args):
    # seed the random number generator (RNG)
    seed = args.seed
    np.random.seed(seed * 13 // 7)
    if args.mode == 0:
        train(args)
    elif args.mode == 1:
        generate(args)
    elif args.mode == 2:
        # compute test perpexlity

        train_data = read_corpus(args.train)

        # Construct vocab
        vocab = Vocab(train_data, int(args.size), int(args.freq_cutoff))

        # Load the trained model
        model = LM_LSTM(hidden_size=args.hidden_dimension,
                        embedding_dim=args.embedding_dimension,
                        output_size=len(vocab.train),
                        n_layers=args.n_layers,
                        is_gru=args.is_gru)
        loss_fn = torch.nn.CrossEntropyLoss(
            reduce=False)  # loss function / optimizer
        if torch.cuda.is_available():
            model = model.cuda()
            loss_fn = loss_fn.cuda()
        model.load_state_dict(
            torch.load(args.model_dir + '/' + args.model_file))
        model.eval()
        dev_data = read_corpus(args.test)
        file = open('sentence_ppl.txt', 'w')
        j = 0
        for sentence in dev_data:
            words = []
            for word in sentence:
                words.append(vocab.train.word2id[word] if word in vocab.train.
                             word2id else vocab.train.word2id['<unk>'])
            words = np.array(words).reshape(-1, 1)
            test_loss, test_ppl = get_val_loss(model, loss_fn, words,
                                               args.seq_len, 1)
            file.write(' '.join(sentence) + " -> " + str(test_ppl))
            j += 1
            if j > 100:
                break
        file.close()
    elif args.mode == 3:
        # compute test perpexlity

        train_data = read_corpus(args.train)

        # Construct vocab
        vocab = Vocab(train_data, int(args.size), int(args.freq_cutoff))

        # Load the trained model
        model = LM_LSTM(hidden_size=args.hidden_dimension,
                        embedding_dim=args.embedding_dimension,
                        output_size=len(vocab.train),
                        n_layers=args.n_layers,
                        is_gru=args.is_gru)
        loss_fn = torch.nn.CrossEntropyLoss(
            reduce=False)  # loss function / optimizer
        if torch.cuda.is_available():
            model = model.cuda()
            loss_fn = loss_fn.cuda()
        model.load_state_dict(
            torch.load(args.model_dir + '/' + args.model_file))
        model.eval()
        dev_data = read_corpus(args.test)
        dev_data = convert_to_idx(dev_data, vocab)
        dev = chunk(dev_data, chunk_size=args.chunk_size, is_evaluation=True)
        test_loss, test_ppl = get_val_loss(model, loss_fn, dev, args.seq_len,
                                           args.chunk_size)
        print('Test set perplexity: {}'.format(test_ppl))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', '-d', type=str, required=True)
    parser.add_argument('--task', '-t', type=str, required=True)
    parser.add_argument('--output_folder_name', '-o', type=str, default="bert_output")
    parser.add_argument('--bert-model', type=str, default='bert-base-uncased')
    parser.add_argument('--batch-size', '-b', type=int, default=16)
    parser.add_argument('--finetuned_model', '-f', type=str)
    parser.add_argument('--layers', type=int, nargs='+', default=list(range(12)))
    parser.add_argument(
        "--attention_mask_heads", default="", type=str, nargs="*",
        help="[layer]:[head1],[head2]..."
    )
    parser.add_argument(
        '--reverse_head_mask', action='store_true',
        help="Mask all heads except those specified by `--attention-mask-heads`"
    )
    parser.add_argument(
        "--actually_prune", action='store_true',
        help="Really prune (like, for real)"
    )
    args = parser.parse_args()

    # prepare output dir

    output_dir = os.path.join(args.output_folder_name, args.task)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # prepare model

    if args.finetuned_model is None:
        model = BertModel.from_pretrained(args.bert_model)
    else:
        model = BertForSequenceClassification.from_pretrained(args.bert_model)
        state_dict = torch.load(args.finetuned_model)
        model.load_state_dict(state_dict)
        model = model.bert

    model.cuda()
    model.eval()

    # TODO:: must be dynamic to pruning
    num_attention_heads=model.encoder.layer[0].attention.self.n_heads

    print(f'TASK - {args.task}')
    print(f'layers - {args.layers}')
    print(f'num heads - {num_attention_heads}')

    # Parse pruning descriptor
    to_prune = pruning.parse_head_pruning_descriptors(
        args.attention_mask_heads,
        reverse_descriptors=args.reverse_head_mask,
    )

    print(f'masked heads - {args.attention_mask_heads}')

    # Mask heads
    if args.actually_prune:
        model.prune_heads(to_prune)
    else:
        model.mask_heads(to_prune)

    # hooks
    hooks = []
    for idx in args.layers:
        attn_hook = BundleAveragingHook()
        hook_bert_layer_attn(model, idx, attn_hook)
        hooks.append((f'attn-l{idx}', attn_hook))

    # load data

    data_file = os.path.join(args.data_dir, args.task, "test.tsv")

    df = pd.read_csv(data_file, sep='\t', quoting=3)

    print("total size :", len(df))

    tokenizer = BertTokenizer.from_pretrained(args.bert_model)

    # processing input data

    token_file = open(os.path.join(output_dir, 'tokens.tsv'), 'wt')
    tsv_writer = csv.writer(token_file, delimiter='\t')

    for data in tqdm(list(chunk(list(df.itertuples()), args.batch_size))):

        if "CoLA" in data_file:
            _, _, sentences = list(zip(*data))
        elif "SST-2" in data_file:
            _, _, sentences = list(zip(*data))
        else:
            raise Exception('Unhandled dataset')

        lower_sentences = [ ' '.join(tokenizer.tokenize(s.lower())) for s in sentences ]

        bundle = SingleInputBundle(list(map(str.split, lower_sentences)), tokenizer.vocab)
        bundle.cuda()
        for _, hook in hooks:
            hook.bundle = bundle
        with torch.no_grad():
            model(bundle.token_ids, bundle.segment_ids, bundle.input_mask)

        for padded_sentence in bundle.padded_sentences:
            tsv_writer.writerow(padded_sentence)

    # storing activations

    for name, hook in hooks:
        combined = torch.cat(hook.data)
        print(name, combined.shape)

        torch.save(combined, os.path.join(output_dir, f'{name}.pt'))
        new_shape = combined.size()[:-1] + (num_attention_heads, -1)

        combined = combined.view(*new_shape).permute((1, 0, 2))
        for subhead_idx, subhead_data in enumerate(combined):
            torch.save(subhead_data, os.path.join(output_dir, f'{name}-{subhead_idx}.pt'))
            print(f'\t{name}-{subhead_idx}', subhead_data.shape)
Esempio n. 36
0
        # cmap="YlOrRd",
        # cmap="hot",
        cmap="Blues",
        annot=display_values,
        square=True,
    )
    g.set_title(title, fontsize=16)
    g.set_xlabel("Predicted Label", fontsize=14)
    g.set_ylabel("True Label", fontsize=14)
    plt.savefig(save_path, bbox_inches="tight")
    plt.show()


conversations, labels = load_corpus_data(corpus, detail_level)

conversations = chunk(conversations, max_nr_utterances)
labels = chunk(labels, max_nr_utterances)

n_tags = len(get_id2tag(corpus, detail_level=detail_level))

tokenizer = get_tokenizer(rebuild_from_all_words=False)
word2id = tokenizer.word_index

X, y = make_model_readable_data(conversations, labels, tokenizer,
                                max_nr_utterances, max_nr_words)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.1,
                                                    shuffle=True)
Esempio n. 37
0
File: qam.py Progetto: ger-zel/modem
    q_out = Queue.Queue()
    p = audiobackend.Play(channels=1, queue=q_out)
    x = utils.rand_gen(10240)
    Fcarr = 2000
    Fsampl = 8000
    K = 6
    signal = modulate(x, Fcarr, Fsampl, K)
    y = demodulate(signal, Fcarr, Fsampl, K)
    x = x.tolist()

    # print len(x)
    # print len(y)
    # print x
    # print y

    if bool(utils.contains(x[0:196], y)) == False:
        # if bool(utils.contains(x, y)) == False:
        print "data error"
    else:
        print "data ok"

    s = utils.conv_to_audio(signal)

    p.start()

    for x in utils.chunk(s, size):
        q_out.put(x)
        p.samples_ready()
    p.done()
    p.join()
Esempio n. 38
0
        if value is not None:
            output_buffers[index].append(value)
            if len(output_buffers[index]) == 3:
                address, X, Y = output_buffers[index]
                output_buffers[index] = []

                if address < size:
                    nodes[address].input.list += [X, Y]
                    nodes[address].input.idle = False
                else:
                    NAT = (X, Y)

                yield address, X, Y, False

        if all(node.input.idle for node in nodes) and NAT is not None:
            address = 0
            X, Y = NAT
            yield address, X, Y, True
            nodes[address].input.list += [X, Y]
            nodes[address].input.idle = False


only_NAT = partial(filter,
                   unpack(lambda address, X, Y, NAT_origin: NAT_origin))
first_repeat_Y = lambda packets: next(p2[2] for p1, p2 in chunk(packets, 2)
                                      if p1[2] == p2[2])

one = compose(
    lambda packets: next(Y for address, X, Y, NAT_origin in packets
                         if address == 255), partial(run, 50), parse)
two = compose(first_repeat_Y, only_NAT, partial(run, 50), parse)