def __init__(self, image_repository, path_columns, savefile, registrator=None, n_processes=mp.cpu_count(), debug=False): self.debug = debug print('initializing analysis...', '\timage repository:\t{}'.format(image_repository), '\tpath columns:\t{}'.format(path_columns), '\tsavefile:\t{}'.format(savefile), '\tprocesses:\t{}'.format(n_processes), '\tmeasurements:\{}'.format(list(MEASUREMENTS.keys())), '\tdenoising methods:\{}'.format(list(METHODS.keys())), sep='\n') self.methods = METHODS.copy() self.measurements = MEASUREMENTS.copy() self.savefile = savefile self.image_repository = image_repository self.path_columns = path_columns self.pool = mp.Pool(n_processes) self.registrator = registrator if isinstance(registrator, Registrator) else Registrator(verbose=debug, graphic=debug) self.denoising = None # make save dir if it does not exist save_path = os.path.dirname(self.savefile) if not os.path.isdir(save_path): os.makedirs(save_path) print('done!\n')
print('\nILP solver...{}'.format(available_solvers[params.solver])) # only gurobi has prior support if params.prior and params.solver != 'gurobi': if 'gurobi' in available_solvers: params.solver = 'gurobi' print ('WARNING: Prior analyses can only be performed using gurobi. Switching to {}...'.format(available_solvers[params.solver])) else: raise Exception('\nPrior analyses can only be performed using gurobi solver.\n') # limit number of threads for parallelization try: ncpu = int(params.threads) except: from pathos.helpers import mp ncpu = mp.cpu_count() print ('Threads...{}'.format(ncpu)) # check if summary stats file already exist in current working directory statsfname = 'summary-stats_{}.txt'.format(inputfname) if statsfname in os.listdir(os.getcwd()): try: overwrite_statsfile = re.search('(y|n)', raw_input('\nWARNING: SUMMARY STATS FILE {} exists in current working directory. Overwrite? (y/n): '.format(statsfname))).group() except: raise Exception('\nInvalid input.\n') else: overwrite_statsfile = 'y' # write header of summary stats output if overwrite_statsfile == 'y': with open(statsfname, 'w') as output:
def new(self): return tf.data.Dataset.from_generator( self._generator, output_types=('int8', 'int8', 'bool', 'float32', 'int32'), output_shapes=((None, *self.shapes[0]), (None, *self.shapes[0]), (None, *self.shapes[1]), (None, *self.shapes[2]), (None, *self.shapes[3]))) if __name__ == '__main__': GEN_ENDED_AT = int(input()) GEN_ENDS_AT = int(input()) mp.set_start_method('spawn') pool = ProcessPool(mp.cpu_count()) critic = Critic([64, 64, 64, 64, 32, 32, 32, 32, 16, 16], NUM_ACT, STOCK_X) critic(critic.stock) if GEN_ENDED_AT >= 0: with open(f'ddrive/{GEN_ENDED_AT}.txt', 'rb') as f: weights = pickle.loads(lzma.decompress(base64.b85decode(f.read()))) critic.set_weights(weights) critic.compile(optimizer=tf.keras.optimizers.SGD(0.0001), loss='mse') cg = CellGroup() for gen in range(GEN_ENDED_AT + 1, GEN_ENDS_AT + 1):
def init_data(self, data_name, n_chunk=1024): print(f'Initializing {data_name} data...') def transform_triple_to_hrt(triple_idx): """ Transforms triple-idx (as a whole) to h/r/t format """ if triple_idx == -1: # for response_triple return NAF_TRIPLE triple = self.idx2triple[triple_idx] h, r, t = triple.split(', ') return [self.word2idx[h], self.rel2idx[r], self.word2idx[t]] def process_file(root, inp): start_i, filename = inp n_sample = line_count(filename) post = np.zeros((n_sample, self.args.max_sentence_len), dtype=np.int32) post_length = np.zeros( (n_sample), dtype=np.int32) # valid length (without pad) response = np.zeros((n_sample, self.args.max_sentence_len), dtype=np.int32) response_length = np.zeros((n_sample), dtype=np.int32) # post_triple = np.zeros((n_sample, self.args.max_sentence_len), dtype=np.int32) triple = np.zeros((n_sample, self.args.max_sentence_len, self.args.max_triple_len, 3), dtype=np.int32) entity = np.zeros((n_sample, self.args.max_sentence_len, self.args.max_triple_len), dtype=np.int32) response_triple = np.zeros( (n_sample, self.args.max_sentence_len, 3), dtype=np.int32) max_post_len, max_response_len, max_triple_len = 0, 0, 0 with jsonlines.open(filename) as df: for i, line in enumerate(df): pl, rl = len(line['post']) + 2, len(line['response']) + 2 post_length[i] = pl response_length[i] = rl max_post_len = max(pl, max_post_len) max_response_len = max(rl, max_response_len) max_triple_len = max([len(l) for l in line['all_triples']] + [max_triple_len]) all_triples = [ line['all_triples'][i - 1] if i > 0 else [-1] for i in line['post_triples'] ] post[i, :pl] = [SOS_IDX] + [ self.get_word_idx(p) for p in line['post'] ] + [EOS_IDX] response[i, :rl] = [SOS_IDX] + [ self.get_word_idx(r) for r in line['response'] ] + [EOS_IDX] # post_triple[i, 1:pl-1] = np.array(line['post_triples']) # [0, 0, 1, 0, 2...] response_triple[i, :rl] = [NAF_TRIPLE] + [ transform_triple_to_hrt(rt) for rt in line['response_triples'] ] + [NAF_TRIPLE] # put NAF_TRIPLE/entity at index 0 triple[i] = pad_2d( [[NAF_TRIPLE]] + [[transform_triple_to_hrt(t) for t in triples] for triples in all_triples] + [[NAF_TRIPLE]], length=(self.args.max_sentence_len, self.args.max_triple_len, 3)) entity[i] = pad_2d( [[NAF_IDX]] + [[self.entidx2wordidx[e] for e in entities] for entities in line['all_entities']] + [[NAF_IDX]], length=(self.args.max_sentence_len, self.args.max_triple_len)) # dump to zarr root['post'][start_i:start_i + n_sample] = post root['post_length'][start_i:start_i + n_sample] = post_length root['response'][start_i:start_i + n_sample] = response root['response_length'][start_i:start_i + n_sample] = response_length # root['post_triple'][start_i : start_i+n_sample] = post_triple root['triple'][start_i:start_i + n_sample] = triple root['entity'][start_i:start_i + n_sample] = entity root['response_triple'][start_i:start_i + n_sample] = response_triple return max_post_len, max_response_len, max_triple_len toread = [ f'{self.data_path}/{data_name}set_pieces/{piece}' for piece in os.listdir(f'{self.data_path}/{data_name}set_pieces') ] n_lines = sum([line_count(piece) for piece in toread]) init_n_lines = math.ceil( n_lines / n_chunk) * n_chunk # 마지막 조각 사이즈가 지정된 청크 사이즈보다 작아져서 나는 에러 방지 root = zarr.open(f'{self.data_path}/{data_name}set_new.zarr', mode='w') post = root.zeros('post', shape=(init_n_lines, self.args.max_sentence_len), chunks=(n_chunk, None), dtype='i4') post_length = root.zeros('post_length', shape=(init_n_lines, ), chunks=(n_chunk, ), dtype='i4') # valid length (without pad) response = root.zeros('response', shape=(init_n_lines, self.args.max_sentence_len), chunks=(n_chunk, None), dtype='i4') response_length = root.zeros('response_length', shape=(init_n_lines, ), chunks=(n_chunk, ), dtype='i4') post_triple = root.zeros('post_triple', shape=(init_n_lines, self.args.max_sentence_len), chunks=(n_chunk, None), dtype='i4') triple = root.zeros('triple', shape=(init_n_lines, self.args.max_sentence_len, self.args.max_triple_len, 3), chunks=(n_chunk, None, None, None), dtype='i4') entity = root.zeros('entity', shape=(init_n_lines, self.args.max_sentence_len, self.args.max_triple_len), chunks=(n_chunk, None, None), dtype='i4') response_triple = root.zeros('response_triple', shape=(init_n_lines, self.args.max_sentence_len, 3), chunks=(n_chunk, None, None), dtype='i4') pool = Pool(min(len(toread), mp.cpu_count())) func = functools.partial(process_file, root) iterinp = [(i * self.args.data_piece_size, filename) for i, filename in enumerate(toread)] max_post_lens, max_response_lens, max_triple_lens = zip( *tqdm(pool.imap(func, iterinp), total=len(iterinp))) max_post_len, max_response_len, max_triple_len = max( max_post_lens), max(max_response_lens), max(max_triple_lens) # trim remaining space post.resize(n_lines, max_post_len) post_length.resize(n_lines) response.resize(n_lines, max_response_len) response_length.resize(n_lines) post_triple.resize(n_lines, max_post_len) triple.resize(n_lines, max_post_len, max_triple_len, 3) entity.resize(n_lines, max_post_len, max_triple_len) response_triple.resize(n_lines, max_response_len, 3) print( f'Dumped {data_name} at: {self.data_path}/{data_name}set_new.zarr')