def preprocess_mri(input_files, output_file): all_files = sorted(input_files) num_images = len(all_files) print('Input images: %d' % num_images) assert num_images > 0 resolution = np.asarray(PIL.Image.open(all_files[0]), dtype=np.uint8).shape assert len(resolution) == 2 # Expect monochromatic images print('Image resolution: %s' % str(resolution)) crop_size = tuple([((r - 1) | 1) for r in resolution]) crop_slice = np.s_[:crop_size[0], :crop_size[1]] print('Crop size: %s' % str(crop_size)) img_primal = np.zeros((num_images, ) + resolution, dtype=np.uint8) img_spectrum = np.zeros((num_images, ) + crop_size, dtype=np.complex64) print('Processing input files..') for i, fn in enumerate(all_files): if i % 100 == 0: print('%d / %d ..' % (i, num_images)) img = np.asarray(PIL.Image.open(fn), dtype=np.uint8) img_primal[i] = img img = img.astype(np.float32) / 255.0 - 0.5 img = img[crop_slice] spec = np.fft.fft2(img).astype(np.complex64) spec = fftshift2d(spec) img_spectrum[i] = spec print('Saving: %s' % output_file) util.save_pkl((img_primal, img_spectrum), output_file)
def generate_tidy_data_file(raw_data: RawData, task: int, mode: int): """Generate tidy data file. Args: raw_data (RawData): Raw data. task (int): A single task. mode (int): A single mode. """ # If item file already exists, then return and print a warning item_file_name: str = DatasetConfig.get_dialog_filename(task, mode) if isfile(item_file_name): print('Warning: Tidy data file {} exists.'.format(item_file_name)) return # Get raw data dialogs according to its mode. dialogs: List[Dialog] = None if mode == TRAIN_MODE: dialogs = raw_data.train_dialogs if mode == VALID_MODE: dialogs = raw_data.valid_dialogs if mode == TEST_MODE: dialogs = raw_data.test_dialogs assert dialogs is not None #if task & KNOWLEDGE_TASK: # ordinal_number = {raw_data.dialog_vocab[key]: value for key, value in # DatasetConfig.ordinal_number.items()} tidy_dialogs: List[TidyDialog] = [] for item_idx, dialog in enumerate(dialogs): print('Getting items from dialogs {}/{}'.format( item_idx + 1, len(dialogs))) # Get items according to different TASKS. if task == INTENTION_TASK: # Standardize dialog first. std_dialog: Dialog = standardized_dialog(dialog) tidy_dialogs.extend(get_intention_task_items(std_dialog)) elif task == TEXT_TASK: tidy_dialogs.extend(get_text_task_items(dialog)) elif task == RECOMMEND_TASK: tidy_dialogs.extend( get_recommend_task_items(raw_data.obj_id, dialog)) elif task == KNOWLEDGE_TASK: items = get_knowledge_items( dialog, #ordinal_number, KNOWLEDGE_TASK) tidy_dialogs.extend(items) # Save as pickle file. #print('Not saving for now') save_pkl(tidy_dialogs, 'tidy_dialogs', item_file_name)
def __init__(self): self.styletips_data: StyleTipsData = None self.celebrity_data: CelebrityData = None self.attribute_data: AttributeData = None if isfile(DatasetConfig.knowledge_data_file): # Read existed extracted data files. knowledge_data = load_pkl(DatasetConfig.knowledge_data_file) self.styletips_data = knowledge_data.styletips_data self.celebrity_data = knowledge_data.celebrity_data self.attribute_data = knowledge_data.attribute_data else: # Load data from raw data file and save them into pkl. self.styletips_data = StyleTipsData.from_file() self.celebrity_data = CelebrityData.from_file() self.attribute_data = AttributeData.from_file() save_pkl(self, 'KnowledgeData', DatasetConfig.knowledge_data_file)
def generate_text(self, sess, start_text='eos', n_words=50): print("generating some text...") #pick a seed document...for example the 10th doc from the training data iterator = self.reader.iterator() for i in range(1): X, Xc = iterator[0].next() Y, L, seq_len, n_batch = iterator[1].next() #get theta seed_text = [self.reader.idx2word[word_idx] for word_idx in list(np.reshape(X, [-1])) if word_idx!=0] print("ptb seed text: ", seed_text) util.save_pkl('seed_text_gru.pkl', seed_text) theta, theta_prop, state = sess.run([self.theta, self.theta_prop, self.final_state], feed_dict={self._Xc: Xc, self._X: X, self._seq_len: seq_len}) print("corresponding topic distribution for the seed text: {}".format(theta_prop)) util.save_pkl('./theta_gru.pkl', theta_prop) tokens = [self.reader.vocab[word] for word in start_text.split()] for i in xrange(n_words): X = np.reshape(np.array([tokens[-1:]]), [1, 1]) feed_dict = {self._X: X, self._seq_len: [1], self.theta: theta, self.final_state: state} state, pred = sess.run( [self.final_state, self.p_y_i], feed_dict=feed_dict) next_word_idx = np.random.choice( np.arange(self.reader.vocab_size), replace=False, p=pred.reshape([-1])) tokens.append(next_word_idx) output = [self.reader.idx2word[word_idx] for word_idx in tokens] return output
def save(self): util.save_pkl(self.get_fname(), self)
def save(self): util.save_pkl(self.fname, self)
def __init__(self, mode: int): # Note: For convenience, RawData loads common data (if exists) only if # mode is NONE_MODE # Attributes. self.mode: int = mode self.dialog_vocab: Dict[str, int] = None self.glove: List[Optional[List[float]]] = None self.obj_id: Dict[str, int] = None #self.obj_paths: List[str] = None # Dynamic attributes. if self.mode & TRAIN_MODE: self.train_dialogs: List[Dialog] = None if self.mode & VALID_MODE: self.valid_dialogs: List[Dialog] = None if self.mode & TEST_MODE: self.test_dialogs: List[Dialog] = None # Check if consistency of data files. RawData.check_consistency(mode) # Read existed extracted data files. self.read_extracted_data() # If common data doesn't exist, then we need to get it. if not isfile(DatasetConfig.common_raw_data_file): common_data = RawData._get_common_data() self.dialog_vocab: Dict[str, int] = common_data.dialog_vocab self.glove: List[Optional[List[float]]] = common_data.glove self.obj_id: Dict[str, int] = common_data.obj_id #self.obj_paths: List[str] = common_data.obj_paths # Save common data to a .pkl file. save_pkl(common_data, 'common_data', DatasetConfig.common_raw_data_file) # If mode specific data doesn't exist, then we need to get it. if self.mode & TRAIN_MODE: has_data_pkl = isfile(DatasetConfig.train_raw_data_file) if not has_data_pkl: self.train_dialogs = RawData._get_dialogs(TRAIN_MODE, self.dialog_vocab, self.obj_id) # Save common data to a .pkl file. save_pkl(self.train_dialogs, 'train_dialogs', DatasetConfig.train_raw_data_file) if self.mode & VALID_MODE: has_data_pkl = isfile(DatasetConfig.valid_raw_data_file) if not has_data_pkl: self.valid_dialogs = RawData._get_dialogs(VALID_MODE, self.dialog_vocab, self.obj_id) # Save common data to a .pkl file. save_pkl(self.valid_dialogs, 'valid_dialogs', DatasetConfig.valid_raw_data_file) if self.mode & TEST_MODE: has_data_pkl = isfile(DatasetConfig.test_raw_data_file) if not has_data_pkl: self.test_dialogs = RawData._get_dialogs(TEST_MODE, self.dialog_vocab, self.obj_id) # Save common data to a .pkl file. save_pkl(self.test_dialogs, 'test_dialogs', DatasetConfig.test_raw_data_file)