def main(data_file): """extract function word features from a text file""" # TODO: parse the review file. Field [0] per line is the review ID. Field[-1] is the review # define this function in util.py reviews, ids = load_reviews(data_file) # debug using just a few reviews = reviews[:10] ids = ids[:10] feature_key = ["the", "or", "and"] print(f"loading feature vectors for {len(reviews)} reviews") # TODO: For function words "the", "or" and "and", use a Python list to # make a count vector per review feature_lists = [] # TODO: Create the same feature vectors as a numpy array feature_np = np.zeros(((len(reviews)), len(feature_key)), dtype=np.int) # TODO: Cast your feature_lists to a numpy array and then verify it is equivalent to feature_np # TODO: Shuffle the list of id's and the feature matrix in unison. Then check your work print(f"Shuffling data") #TODO: define this function in util.py shuffled_feature_matrix, shuffled_ids = shuffle_dataset(feature_np, ids) print("ids before shuffle") print(ids) print("ids after shuffle") print(shuffled_ids)
def main(data_file): """extract function word features from a text file""" # TODO: parse the review file. Field [0] per line is the review ID. Field[-1] is the review # define this function in util.py reviews, ids = load_reviews(data_file) # debug using just a few reviews = reviews[:10] ids = ids[:10] feature_key = ["the", "or", "and"] print(f"loading feature vectors for {len(reviews)} reviews") # TODO: For function words "the", "or" and "and", use a Python list to # make a count vector per review feature_lists = [] for review in reviews: # TODO: Create the same feature vectors as a numpy array feature_np = np.zeros(((len(reviews)), len(feature_key)), dtype=np.int) # TODO: Cast your feature_lists to a numpy array and then verify it is equivalent to feature_np # TODO: Shuffle the list of id's and the feature matrix in unison. Then check your work print(f"Shuffling data") #TODO: define this function in util.py shuffled_feature_matrix, shuffled_ids = shuffle_dataset(feature_np, ids) print("ids before shuffle") print(ids) print("ids after shuffle") print(shuffled_ids) if __name__ == '__main__': parser = argparse.ArgumentParser(description='feature vector lab') parser.add_argument('--path', type=str, default="imdb_practice.txt", help='path to input with one review per line') args = parser.parse_args() main(args.path)
def get_sample(self,train=True): if train: folders = self.train_folders else: folders = self.test_folders folders_per_batch = 10 images = [] labels = [] folder_sample = np.random.choice(folders, folders_per_batch) for folder in folder_sample: folder_path = self.input_file_path + '/' + folder + '/predictors_and_targets.npz' npzfile = np.load(folder_path) images.extend(npzfile['predictors']) labels.extend(npzfile['targets']) images = np.array(images) labels = np.array(labels) images, labels = shuffle_dataset(images,labels) return images, labels
def get_sample(self,train=True): if train: folders = self.train_folders else: folders = self.test_folders folders_per_batch = 10 images = [] labels = [] for _ in range(folders_per_batch): folder = self.get_weighted_random_folder(folders) folder_path = self.input_file_path + '/' + str(folder) + '/predictors_and_targets.npz' npzfile = np.load(folder_path) images.extend(npzfile['predictors']) labels.extend(npzfile['targets']) if len(images) > self.max_sample_records: images, labels = self.reduce_record_count(images, labels) return images, labels images = np.array(images) labels = np.array(labels) images, labels = shuffle_dataset(images,labels) return images, labels
def get_sample(self, train=True): if train: folders = self.train_folders else: folders = self.test_folders folders_per_batch = 10 images = [] labels = [] for _ in range(folders_per_batch): folder = self.get_weighted_random_folder(folders) folder_path = self.input_file_path + '/' + str( folder) + '/predictors_and_targets.npz' npzfile = np.load(folder_path) images.extend(npzfile['predictors']) labels.extend(npzfile['targets']) if len(images) > self.max_sample_records: images, labels = self.reduce_record_count(images, labels) return images, labels images = np.array(images) labels = np.array(labels) images, labels = shuffle_dataset(images, labels) return images, labels
default='1000') args = vars(ap.parse_args()) data_path = args["datapath"] batch_iterations = int(args["batches"]) input_file_path = data_path+'/data_115.npz' tfboard_basedir = mkdir(data_path+'/tf_visual_data/runs/') tfboard_run_dir = mkdir_tfboard_run_dir(tfboard_basedir) model_checkpoint_path = mkdir(tfboard_run_dir+'/trained_model') npzfile = np.load(input_file_path) # training data train_predictors = npzfile['train_predictors'] train_targets = npzfile['train_targets'] train_predictors, train_targets = shuffle_dataset(train_predictors, train_targets) # validation/test data validation_predictors = npzfile['validation_predictors'] validation_targets = npzfile['validation_targets'] validation_predictors, validation_targets = shuffle_dataset(validation_predictors, validation_targets) sess = tf.InteractiveSession(config=tf.ConfigProto()) def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape)
input_file_path = data_path + '/final_processed_data_3_channels.npz' tfboard_basedir = mkdir(data_path + '/tf_visual_data/runs/') tfboard_run_dir = mkdir_tfboard_run_dir(tfboard_basedir) model_checkpoint_path = mkdir(tfboard_run_dir + '/trained_model') npzfile = np.load(input_file_path) # training data train_predictors = npzfile['train_predictors'] train_targets = npzfile['train_targets'] # validation/test data validation_predictors = npzfile['validation_predictors'] validation_targets = npzfile['validation_targets'] validation_predictors, validation_targets = shuffle_dataset( validation_predictors, validation_targets) sess = tf.InteractiveSession(config=tf.ConfigProto()) def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) def conv2d(x, W):
import matplotlib.pyplot as plt from mnist import load_mnist from multi_layer_net import MultiLayerNet from util import shuffle_dataset from trainer import Trainer (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) # 결과를 빠르게 얻기 위해 훈련 데이터를 줄임 x_train = x_train[:500] t_train = t_train[:500] # 20%를 검증 데이터로 분할 validation_rate = 0.20 validation_num = int(x_train.shape[0] * validation_rate) x_train, t_train = shuffle_dataset(x_train, t_train) x_val = x_train[:validation_num] t_val = t_train[:validation_num] x_train = x_train[validation_num:] t_train = t_train[validation_num:] def __train(lr, weight_decay, epocs=50): network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay) trainer = Trainer(network, x_train, t_train, x_val,
def main(data_file): """extract function word features from a text file""" # TODO: parse the review file. Field [0] per line is the review ID. Field[-1] is the review # define this function in util.py reviews, ids = load_reviews(data_file) ###################### debug using just a few reviews = reviews[:10] ids = ids[:10] ###################### print('\n Debug: \n ') print(reviews) print(ids) print('\n') ###################### feature_key = ["the", "or", "and"] print(f"loading feature vectors for {len(reviews)} reviews") # For function words "the", "or" and "and", use a Python list to # make a count vector per review feature_lists = [] for review in reviews: review_words = word_tokenize(review.lower()) vec = [] for word in feature_key: these_words = [w for w in review_words if w == word] vec.append(len(these_words)) feature_lists.append(vec) print(feature_lists) # Create the same feature vectors as a numpy array feature_np = np.zeros(((len(reviews)), len(feature_key)), dtype=np.int) for i, review in enumerate(reviews): review_words = word_tokenize(review.lower()) for j, word in enumerate(feature_key): these_words = [w for w in review_words if w == word] feature_np[i, j] = len(these_words) print(feature_np) # Cast your feature_lists to a numpy array and then verify it is equivalent to feature_np feature_lists_np = np.asarray(feature_lists) print(f'equal? {np.array_equal(feature_lists_np, feature_np)}') # Shuffle the list of id's and the feature matrix in unison. Then check your work print("ids before shuffle") print(ids) print("ids after shuffle") nums = np.random.permutation(len(ids)) print(nums) shuffled_ids = [ids[i] for i in nums] print(shuffled_ids) print("feature matrix before shuffle") print(feature_np) print("feature matrix after shuffle") shuffled_feature_np = np.zeros(((len(reviews)), len(feature_key)), dtype=np.int) for i in range(len(reviews)): shuffled_feature_np[i] = feature_np[nums[i]] print(shuffled_feature_np) # define this function in util.py shuffled_feature_matrix, shuffled_ids = shuffle_dataset(feature_np, ids) print("ids before shuffle") print(ids) print("ids after shuffle") print(shuffled_ids)