Esempio n. 1
0
    def __init__(self, image_repository, path_columns, savefile,
                 registrator=None, n_processes=mp.cpu_count(), debug=False):

        self.debug = debug
        print('initializing analysis...',
              '\timage repository:\t{}'.format(image_repository),
              '\tpath columns:\t{}'.format(path_columns),
              '\tsavefile:\t{}'.format(savefile),
              '\tprocesses:\t{}'.format(n_processes),
              '\tmeasurements:\{}'.format(list(MEASUREMENTS.keys())),
              '\tdenoising methods:\{}'.format(list(METHODS.keys())), sep='\n')

        self.methods = METHODS.copy()
        self.measurements = MEASUREMENTS.copy()

        self.savefile = savefile
        self.image_repository = image_repository
        self.path_columns = path_columns
        self.pool = mp.Pool(n_processes)
        self.registrator = registrator if isinstance(registrator, Registrator) else Registrator(verbose=debug, graphic=debug)
        self.denoising = None

        # make save dir if it does not exist
        save_path = os.path.dirname(self.savefile)
        if not os.path.isdir(save_path):
            os.makedirs(save_path)

        print('done!\n')
Esempio n. 2
0
    print('\nILP solver...{}'.format(available_solvers[params.solver]))

    # only gurobi has prior support
    if params.prior and params.solver != 'gurobi':
        if 'gurobi' in available_solvers:
            params.solver = 'gurobi'
            print ('WARNING: Prior analyses can only be performed using gurobi. Switching to {}...'.format(available_solvers[params.solver]))
        else:
            raise Exception('\nPrior analyses can only be performed using gurobi solver.\n')

    # limit number of threads for parallelization
    try:
        ncpu = int(params.threads)
    except:
        from pathos.helpers import mp
        ncpu = mp.cpu_count()
    print ('Threads...{}'.format(ncpu))

    # check if summary stats file already exist in current working directory
    statsfname = 'summary-stats_{}.txt'.format(inputfname)
    if statsfname in os.listdir(os.getcwd()):
        try:
            overwrite_statsfile = re.search('(y|n)', raw_input('\nWARNING: SUMMARY STATS FILE {} exists in current working directory. Overwrite? (y/n): '.format(statsfname))).group()
        except:
            raise Exception('\nInvalid input.\n')
    else:
        overwrite_statsfile = 'y'

    # write header of summary stats output
    if overwrite_statsfile == 'y':
        with open(statsfname, 'w') as output:
Esempio n. 3
0
    def new(self):
        return tf.data.Dataset.from_generator(
            self._generator,
            output_types=('int8', 'int8', 'bool', 'float32', 'int32'),
            output_shapes=((None, *self.shapes[0]), (None, *self.shapes[0]),
                           (None, *self.shapes[1]), (None, *self.shapes[2]),
                           (None, *self.shapes[3])))


if __name__ == '__main__':
    GEN_ENDED_AT = int(input())
    GEN_ENDS_AT = int(input())

    mp.set_start_method('spawn')

    pool = ProcessPool(mp.cpu_count())

    critic = Critic([64, 64, 64, 64, 32, 32, 32, 32, 16, 16], NUM_ACT, STOCK_X)
    critic(critic.stock)

    if GEN_ENDED_AT >= 0:
        with open(f'ddrive/{GEN_ENDED_AT}.txt', 'rb') as f:
            weights = pickle.loads(lzma.decompress(base64.b85decode(f.read())))

        critic.set_weights(weights)

    critic.compile(optimizer=tf.keras.optimizers.SGD(0.0001), loss='mse')

    cg = CellGroup()

    for gen in range(GEN_ENDED_AT + 1, GEN_ENDS_AT + 1):
Esempio n. 4
0
    def init_data(self, data_name, n_chunk=1024):
        print(f'Initializing {data_name} data...')

        def transform_triple_to_hrt(triple_idx):
            """ Transforms triple-idx (as a whole) to h/r/t format """
            if triple_idx == -1:  # for response_triple
                return NAF_TRIPLE
            triple = self.idx2triple[triple_idx]
            h, r, t = triple.split(', ')
            return [self.word2idx[h], self.rel2idx[r], self.word2idx[t]]

        def process_file(root, inp):
            start_i, filename = inp
            n_sample = line_count(filename)

            post = np.zeros((n_sample, self.args.max_sentence_len),
                            dtype=np.int32)
            post_length = np.zeros(
                (n_sample), dtype=np.int32)  # valid length (without pad)
            response = np.zeros((n_sample, self.args.max_sentence_len),
                                dtype=np.int32)
            response_length = np.zeros((n_sample), dtype=np.int32)
            # post_triple = np.zeros((n_sample, self.args.max_sentence_len), dtype=np.int32)
            triple = np.zeros((n_sample, self.args.max_sentence_len,
                               self.args.max_triple_len, 3),
                              dtype=np.int32)
            entity = np.zeros((n_sample, self.args.max_sentence_len,
                               self.args.max_triple_len),
                              dtype=np.int32)
            response_triple = np.zeros(
                (n_sample, self.args.max_sentence_len, 3), dtype=np.int32)

            max_post_len, max_response_len, max_triple_len = 0, 0, 0

            with jsonlines.open(filename) as df:
                for i, line in enumerate(df):

                    pl, rl = len(line['post']) + 2, len(line['response']) + 2
                    post_length[i] = pl
                    response_length[i] = rl

                    max_post_len = max(pl, max_post_len)
                    max_response_len = max(rl, max_response_len)
                    max_triple_len = max([len(l)
                                          for l in line['all_triples']] +
                                         [max_triple_len])

                    all_triples = [
                        line['all_triples'][i - 1] if i > 0 else [-1]
                        for i in line['post_triples']
                    ]

                    post[i, :pl] = [SOS_IDX] + [
                        self.get_word_idx(p) for p in line['post']
                    ] + [EOS_IDX]
                    response[i, :rl] = [SOS_IDX] + [
                        self.get_word_idx(r) for r in line['response']
                    ] + [EOS_IDX]
                    # post_triple[i, 1:pl-1] = np.array(line['post_triples']) # [0, 0, 1, 0, 2...]
                    response_triple[i, :rl] = [NAF_TRIPLE] + [
                        transform_triple_to_hrt(rt)
                        for rt in line['response_triples']
                    ] + [NAF_TRIPLE]

                    # put NAF_TRIPLE/entity at index 0
                    triple[i] = pad_2d(
                        [[NAF_TRIPLE]] +
                        [[transform_triple_to_hrt(t) for t in triples]
                         for triples in all_triples] + [[NAF_TRIPLE]],
                        length=(self.args.max_sentence_len,
                                self.args.max_triple_len, 3))
                    entity[i] = pad_2d(
                        [[NAF_IDX]] +
                        [[self.entidx2wordidx[e] for e in entities]
                         for entities in line['all_entities']] + [[NAF_IDX]],
                        length=(self.args.max_sentence_len,
                                self.args.max_triple_len))

                # dump to zarr
                root['post'][start_i:start_i + n_sample] = post
                root['post_length'][start_i:start_i + n_sample] = post_length
                root['response'][start_i:start_i + n_sample] = response
                root['response_length'][start_i:start_i +
                                        n_sample] = response_length
                # root['post_triple'][start_i : start_i+n_sample] = post_triple
                root['triple'][start_i:start_i + n_sample] = triple
                root['entity'][start_i:start_i + n_sample] = entity
                root['response_triple'][start_i:start_i +
                                        n_sample] = response_triple

            return max_post_len, max_response_len, max_triple_len

        toread = [
            f'{self.data_path}/{data_name}set_pieces/{piece}'
            for piece in os.listdir(f'{self.data_path}/{data_name}set_pieces')
        ]
        n_lines = sum([line_count(piece) for piece in toread])
        init_n_lines = math.ceil(
            n_lines /
            n_chunk) * n_chunk  # 마지막 조각 사이즈가 지정된 청크 사이즈보다 작아져서 나는 에러 방지

        root = zarr.open(f'{self.data_path}/{data_name}set_new.zarr', mode='w')
        post = root.zeros('post',
                          shape=(init_n_lines, self.args.max_sentence_len),
                          chunks=(n_chunk, None),
                          dtype='i4')
        post_length = root.zeros('post_length',
                                 shape=(init_n_lines, ),
                                 chunks=(n_chunk, ),
                                 dtype='i4')  # valid length (without pad)
        response = root.zeros('response',
                              shape=(init_n_lines, self.args.max_sentence_len),
                              chunks=(n_chunk, None),
                              dtype='i4')
        response_length = root.zeros('response_length',
                                     shape=(init_n_lines, ),
                                     chunks=(n_chunk, ),
                                     dtype='i4')
        post_triple = root.zeros('post_triple',
                                 shape=(init_n_lines,
                                        self.args.max_sentence_len),
                                 chunks=(n_chunk, None),
                                 dtype='i4')
        triple = root.zeros('triple',
                            shape=(init_n_lines, self.args.max_sentence_len,
                                   self.args.max_triple_len, 3),
                            chunks=(n_chunk, None, None, None),
                            dtype='i4')
        entity = root.zeros('entity',
                            shape=(init_n_lines, self.args.max_sentence_len,
                                   self.args.max_triple_len),
                            chunks=(n_chunk, None, None),
                            dtype='i4')
        response_triple = root.zeros('response_triple',
                                     shape=(init_n_lines,
                                            self.args.max_sentence_len, 3),
                                     chunks=(n_chunk, None, None),
                                     dtype='i4')

        pool = Pool(min(len(toread), mp.cpu_count()))
        func = functools.partial(process_file, root)
        iterinp = [(i * self.args.data_piece_size, filename)
                   for i, filename in enumerate(toread)]
        max_post_lens, max_response_lens, max_triple_lens = zip(
            *tqdm(pool.imap(func, iterinp), total=len(iterinp)))

        max_post_len, max_response_len, max_triple_len = max(
            max_post_lens), max(max_response_lens), max(max_triple_lens)

        # trim remaining space
        post.resize(n_lines, max_post_len)
        post_length.resize(n_lines)
        response.resize(n_lines, max_response_len)
        response_length.resize(n_lines)
        post_triple.resize(n_lines, max_post_len)
        triple.resize(n_lines, max_post_len, max_triple_len, 3)
        entity.resize(n_lines, max_post_len, max_triple_len)
        response_triple.resize(n_lines, max_response_len, 3)

        print(
            f'Dumped {data_name} at: {self.data_path}/{data_name}set_new.zarr')