Ejemplo n.º 1
0
def preprocess_mri(input_files, output_file):
    all_files = sorted(input_files)
    num_images = len(all_files)
    print('Input images: %d' % num_images)
    assert num_images > 0

    resolution = np.asarray(PIL.Image.open(all_files[0]), dtype=np.uint8).shape
    assert len(resolution) == 2  # Expect monochromatic images
    print('Image resolution: %s' % str(resolution))

    crop_size = tuple([((r - 1) | 1) for r in resolution])
    crop_slice = np.s_[:crop_size[0], :crop_size[1]]
    print('Crop size: %s' % str(crop_size))

    img_primal = np.zeros((num_images, ) + resolution, dtype=np.uint8)
    img_spectrum = np.zeros((num_images, ) + crop_size, dtype=np.complex64)

    print('Processing input files..')
    for i, fn in enumerate(all_files):
        if i % 100 == 0:
            print('%d / %d ..' % (i, num_images))
        img = np.asarray(PIL.Image.open(fn), dtype=np.uint8)
        img_primal[i] = img

        img = img.astype(np.float32) / 255.0 - 0.5
        img = img[crop_slice]
        spec = np.fft.fft2(img).astype(np.complex64)
        spec = fftshift2d(spec)
        img_spectrum[i] = spec

    print('Saving: %s' % output_file)
    util.save_pkl((img_primal, img_spectrum), output_file)
Ejemplo n.º 2
0
def generate_tidy_data_file(raw_data: RawData, task: int, mode: int):
    """Generate tidy data file.
    Args:
        raw_data (RawData): Raw data.
        task (int): A single task.
        mode (int): A single mode.
    """

    # If item file already exists, then return and print a warning
    item_file_name: str = DatasetConfig.get_dialog_filename(task, mode)
    if isfile(item_file_name):
        print('Warning: Tidy data file {} exists.'.format(item_file_name))
        return

    # Get raw data dialogs according to its mode.
    dialogs: List[Dialog] = None
    if mode == TRAIN_MODE:
        dialogs = raw_data.train_dialogs
    if mode == VALID_MODE:
        dialogs = raw_data.valid_dialogs
    if mode == TEST_MODE:
        dialogs = raw_data.test_dialogs
    assert dialogs is not None

    #if task & KNOWLEDGE_TASK:
    #    ordinal_number = {raw_data.dialog_vocab[key]: value for key, value in
    #                      DatasetConfig.ordinal_number.items()}

    tidy_dialogs: List[TidyDialog] = []
    for item_idx, dialog in enumerate(dialogs):
        print('Getting items from dialogs {}/{}'.format(
            item_idx + 1, len(dialogs)))

        # Get items according to different TASKS.
        if task == INTENTION_TASK:
            # Standardize dialog first.
            std_dialog: Dialog = standardized_dialog(dialog)
            tidy_dialogs.extend(get_intention_task_items(std_dialog))
        elif task == TEXT_TASK:
            tidy_dialogs.extend(get_text_task_items(dialog))
        elif task == RECOMMEND_TASK:
            tidy_dialogs.extend(
                get_recommend_task_items(raw_data.obj_id, dialog))
        elif task == KNOWLEDGE_TASK:
            items = get_knowledge_items(
                dialog,  #ordinal_number,
                KNOWLEDGE_TASK)
            tidy_dialogs.extend(items)

    # Save as pickle file.
    #print('Not saving for now')
    save_pkl(tidy_dialogs, 'tidy_dialogs', item_file_name)
Ejemplo n.º 3
0
    def __init__(self):
        self.styletips_data: StyleTipsData = None
        self.celebrity_data: CelebrityData = None
        self.attribute_data: AttributeData = None

        if isfile(DatasetConfig.knowledge_data_file):
            # Read existed extracted data files.
            knowledge_data = load_pkl(DatasetConfig.knowledge_data_file)
            self.styletips_data = knowledge_data.styletips_data
            self.celebrity_data = knowledge_data.celebrity_data
            self.attribute_data = knowledge_data.attribute_data
        else:
            # Load data from raw data file and save them into pkl.
            self.styletips_data = StyleTipsData.from_file()
            self.celebrity_data = CelebrityData.from_file()
            self.attribute_data = AttributeData.from_file()
            save_pkl(self, 'KnowledgeData', DatasetConfig.knowledge_data_file)
Ejemplo n.º 4
0
  def generate_text(self, sess, start_text='eos', n_words=50):
    print("generating some text...")
    #pick a seed document...for example the 10th doc from the training data 
    iterator = self.reader.iterator()
    for i in range(1):
      X, Xc = iterator[0].next()
      Y, L, seq_len, n_batch = iterator[1].next()
    #get theta
    seed_text = [self.reader.idx2word[word_idx] 
                  for word_idx in list(np.reshape(X, [-1])) if word_idx!=0]
    print("ptb seed text: ", seed_text)
    util.save_pkl('seed_text_gru.pkl', seed_text)
    theta, theta_prop, state = sess.run([self.theta, self.theta_prop, self.final_state], 
                                   feed_dict={self._Xc: Xc, 
                                              self._X: X,
                                              self._seq_len: seq_len})
    print("corresponding topic distribution for the seed text: {}".format(theta_prop))
    util.save_pkl('./theta_gru.pkl', theta_prop)
    tokens = [self.reader.vocab[word] for word in start_text.split()]

    for i in xrange(n_words):
      X = np.reshape(np.array([tokens[-1:]]), [1, 1])
      feed_dict = {self._X: X,
                   self._seq_len: [1],
                   self.theta: theta,
		   self.final_state: state}
      state, pred = sess.run(
          [self.final_state, self.p_y_i], feed_dict=feed_dict)

      next_word_idx = np.random.choice( 
                      np.arange(self.reader.vocab_size), 
                      replace=False, p=pred.reshape([-1]))
      tokens.append(next_word_idx)

    output = [self.reader.idx2word[word_idx] for word_idx in tokens]

    return output
Ejemplo n.º 5
0
 def save(self):
     util.save_pkl(self.get_fname(), self)
Ejemplo n.º 6
0
 def save(self):
     util.save_pkl(self.fname, self)
Ejemplo n.º 7
0
    def __init__(self, mode: int):
        # Note: For convenience, RawData loads common data (if exists) only if
        # mode is NONE_MODE

        # Attributes.
        self.mode: int = mode

        self.dialog_vocab: Dict[str, int] = None
        self.glove: List[Optional[List[float]]] = None
        self.obj_id: Dict[str, int] = None
        #self.obj_paths: List[str] = None

        # Dynamic attributes.
        if self.mode & TRAIN_MODE:
            self.train_dialogs: List[Dialog] = None
        if self.mode & VALID_MODE:
            self.valid_dialogs: List[Dialog] = None
        if self.mode & TEST_MODE:
            self.test_dialogs: List[Dialog] = None

        # Check if consistency of data files.
        RawData.check_consistency(mode)

        # Read existed extracted data files.
        self.read_extracted_data()

        # If common data doesn't exist, then we need to get it.
        if not isfile(DatasetConfig.common_raw_data_file):
            common_data = RawData._get_common_data()
            self.dialog_vocab: Dict[str, int] = common_data.dialog_vocab
            self.glove: List[Optional[List[float]]] = common_data.glove
            self.obj_id: Dict[str, int] = common_data.obj_id
            #self.obj_paths: List[str] = common_data.obj_paths

            # Save common data to a .pkl file.
            save_pkl(common_data, 'common_data',
                     DatasetConfig.common_raw_data_file)

        # If mode specific data doesn't exist, then we need to get it.
        if self.mode & TRAIN_MODE:
            has_data_pkl = isfile(DatasetConfig.train_raw_data_file)

            if not has_data_pkl:
                self.train_dialogs = RawData._get_dialogs(TRAIN_MODE,
                                                          self.dialog_vocab,
                                                          self.obj_id)
                # Save common data to a .pkl file.
                save_pkl(self.train_dialogs, 'train_dialogs',
                         DatasetConfig.train_raw_data_file)

        if self.mode & VALID_MODE:
            has_data_pkl = isfile(DatasetConfig.valid_raw_data_file)

            if not has_data_pkl:
                self.valid_dialogs = RawData._get_dialogs(VALID_MODE,
                                                          self.dialog_vocab,
                                                          self.obj_id)
                # Save common data to a .pkl file.
                save_pkl(self.valid_dialogs, 'valid_dialogs',
                         DatasetConfig.valid_raw_data_file)

        if self.mode & TEST_MODE:
            has_data_pkl = isfile(DatasetConfig.test_raw_data_file)

            if not has_data_pkl:
                self.test_dialogs = RawData._get_dialogs(TEST_MODE,
                                                         self.dialog_vocab,
                                                         self.obj_id)
                # Save common data to a .pkl file.
                save_pkl(self.test_dialogs, 'test_dialogs',
                         DatasetConfig.test_raw_data_file)