Exemple #1
0
def main(data_file):
    """extract function word features from a text file"""

    # TODO: parse the review file. Field [0] per line is the review ID. Field[-1] is the review
    # define this function in util.py
    reviews, ids = load_reviews(data_file)

    # debug using just a few
    reviews = reviews[:10]
    ids = ids[:10]

    feature_key = ["the", "or", "and"]

    print(f"loading feature vectors for {len(reviews)} reviews")

    # TODO: For function words "the", "or" and "and", use a Python list to
    #     make a count vector per review
    feature_lists = []

    # TODO: Create the same feature vectors as a numpy array
    feature_np = np.zeros(((len(reviews)), len(feature_key)), dtype=np.int)

    # TODO: Cast your feature_lists to a numpy array and then verify it is equivalent to feature_np

    # TODO: Shuffle the list of id's and the feature matrix in unison. Then check your work
    print(f"Shuffling data")
    #TODO: define this function in util.py
    shuffled_feature_matrix, shuffled_ids = shuffle_dataset(feature_np, ids)
    print("ids before shuffle")
    print(ids)
    print("ids after shuffle")
    print(shuffled_ids)
Exemple #2
0
def main(data_file):
    """extract function word features from a text file"""

    # TODO: parse the review file. Field [0] per line is the review ID. Field[-1] is the review
    # define this function in util.py
    reviews, ids = load_reviews(data_file)

    # debug using just a few
    reviews = reviews[:10]
    ids = ids[:10]

    feature_key = ["the", "or", "and"]

    print(f"loading feature vectors for {len(reviews)} reviews")

    # TODO: For function words "the", "or" and "and", use a Python list to
    #     make a count vector per review
    feature_lists = []
    for review in reviews:


    # TODO: Create the same feature vectors as a numpy array
    feature_np = np.zeros(((len(reviews)), len(feature_key)), dtype=np.int)

    # TODO: Cast your feature_lists to a numpy array and then verify it is equivalent to feature_np

    # TODO: Shuffle the list of id's and the feature matrix in unison. Then check your work
    print(f"Shuffling data")
    #TODO: define this function in util.py
    shuffled_feature_matrix, shuffled_ids = shuffle_dataset(feature_np, ids)
    print("ids before shuffle")
    print(ids)
    print("ids after shuffle")
    print(shuffled_ids)



if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='feature vector lab')
    parser.add_argument('--path', type=str, default="imdb_practice.txt",
                        help='path to input with one review per line')

    args = parser.parse_args()

    main(args.path)
 def get_sample(self,train=True):
     if train:
         folders = self.train_folders
     else:
         folders = self.test_folders
     folders_per_batch = 10
     images = []
     labels = []
     folder_sample = np.random.choice(folders, folders_per_batch)
     for folder in folder_sample:
         folder_path = self.input_file_path + '/' + folder + '/predictors_and_targets.npz'
         npzfile = np.load(folder_path)
         images.extend(npzfile['predictors'])
         labels.extend(npzfile['targets'])
     images = np.array(images)
     labels = np.array(labels)
     images, labels = shuffle_dataset(images,labels)
     return images, labels
 def get_sample(self,train=True):
     if train:
         folders = self.train_folders
     else:
         folders = self.test_folders
     folders_per_batch = 10
     images = []
     labels = []
     for _ in range(folders_per_batch):
         folder = self.get_weighted_random_folder(folders)
         folder_path = self.input_file_path + '/' + str(folder) + '/predictors_and_targets.npz'
         npzfile = np.load(folder_path)
         images.extend(npzfile['predictors'])
         labels.extend(npzfile['targets'])
         if len(images) > self.max_sample_records:
             images, labels = self.reduce_record_count(images, labels)
             return images, labels
     images = np.array(images)
     labels = np.array(labels)
     images, labels = shuffle_dataset(images,labels)
     return images, labels
Exemple #5
0
 def get_sample(self, train=True):
     if train:
         folders = self.train_folders
     else:
         folders = self.test_folders
     folders_per_batch = 10
     images = []
     labels = []
     for _ in range(folders_per_batch):
         folder = self.get_weighted_random_folder(folders)
         folder_path = self.input_file_path + '/' + str(
             folder) + '/predictors_and_targets.npz'
         npzfile = np.load(folder_path)
         images.extend(npzfile['predictors'])
         labels.extend(npzfile['targets'])
         if len(images) > self.max_sample_records:
             images, labels = self.reduce_record_count(images, labels)
             return images, labels
     images = np.array(images)
     labels = np.array(labels)
     images, labels = shuffle_dataset(images, labels)
     return images, labels
    default='1000')
args = vars(ap.parse_args())
data_path = args["datapath"]
batch_iterations = int(args["batches"])

input_file_path = data_path+'/data_115.npz'
tfboard_basedir = mkdir(data_path+'/tf_visual_data/runs/')
tfboard_run_dir = mkdir_tfboard_run_dir(tfboard_basedir)
model_checkpoint_path = mkdir(tfboard_run_dir+'/trained_model')

npzfile = np.load(input_file_path)

# training data
train_predictors = npzfile['train_predictors']
train_targets = npzfile['train_targets']
train_predictors, train_targets = shuffle_dataset(train_predictors, train_targets)

# validation/test data
validation_predictors = npzfile['validation_predictors']
validation_targets = npzfile['validation_targets']
validation_predictors, validation_targets = shuffle_dataset(validation_predictors, validation_targets)


sess = tf.InteractiveSession(config=tf.ConfigProto())

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
input_file_path = data_path + '/final_processed_data_3_channels.npz'
tfboard_basedir = mkdir(data_path + '/tf_visual_data/runs/')
tfboard_run_dir = mkdir_tfboard_run_dir(tfboard_basedir)
model_checkpoint_path = mkdir(tfboard_run_dir + '/trained_model')

npzfile = np.load(input_file_path)

# training data
train_predictors = npzfile['train_predictors']
train_targets = npzfile['train_targets']

# validation/test data
validation_predictors = npzfile['validation_predictors']
validation_targets = npzfile['validation_targets']
validation_predictors, validation_targets = shuffle_dataset(
    validation_predictors, validation_targets)

sess = tf.InteractiveSession(config=tf.ConfigProto())


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


def conv2d(x, W):
Exemple #8
0
import matplotlib.pyplot as plt
from mnist import load_mnist
from multi_layer_net import MultiLayerNet
from util import shuffle_dataset
from trainer import Trainer

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

# 결과를 빠르게 얻기 위해 훈련 데이터를 줄임
x_train = x_train[:500]
t_train = t_train[:500]

# 20%를 검증 데이터로 분할
validation_rate = 0.20
validation_num = int(x_train.shape[0] * validation_rate)
x_train, t_train = shuffle_dataset(x_train, t_train)
x_val = x_train[:validation_num]
t_val = t_train[:validation_num]
x_train = x_train[validation_num:]
t_train = t_train[validation_num:]


def __train(lr, weight_decay, epocs=50):
    network = MultiLayerNet(input_size=784,
                            hidden_size_list=[100, 100, 100, 100, 100, 100],
                            output_size=10,
                            weight_decay_lambda=weight_decay)
    trainer = Trainer(network,
                      x_train,
                      t_train,
                      x_val,
Exemple #9
0
def main(data_file):
    """extract function word features from a text file"""
    # TODO: parse the review file. Field [0] per line is the review ID. Field[-1] is the review
    # define this function in util.py
    reviews, ids = load_reviews(data_file)

    ###################### debug using just a few
    reviews = reviews[:10]
    ids = ids[:10]

    ######################
    print('\n Debug: \n ')
    print(reviews)
    print(ids)
    print('\n')
    ######################

    feature_key = ["the", "or", "and"]
    print(f"loading feature vectors for {len(reviews)} reviews")

    # For function words "the", "or" and "and", use a Python list to
    # make a count vector per review
    feature_lists = []
    for review in reviews:
        review_words = word_tokenize(review.lower())
        vec = []
        for word in feature_key:
            these_words = [w for w in review_words if w == word]
            vec.append(len(these_words))
        feature_lists.append(vec)
    print(feature_lists)

    # Create the same feature vectors as a numpy array
    feature_np = np.zeros(((len(reviews)), len(feature_key)), dtype=np.int)
    for i, review in enumerate(reviews):
        review_words = word_tokenize(review.lower())
        for j, word in enumerate(feature_key):
            these_words = [w for w in review_words if w == word]
            feature_np[i, j] = len(these_words)
    print(feature_np)

    # Cast your feature_lists to a numpy array and then verify it is equivalent to feature_np
    feature_lists_np = np.asarray(feature_lists)
    print(f'equal? {np.array_equal(feature_lists_np, feature_np)}')

    # Shuffle the list of id's and the feature matrix in unison. Then check your work
    print("ids before shuffle")
    print(ids)

    print("ids after shuffle")
    nums = np.random.permutation(len(ids))
    print(nums)
    shuffled_ids = [ids[i] for i in nums]
    print(shuffled_ids)

    print("feature matrix before shuffle")
    print(feature_np)

    print("feature matrix after shuffle")
    shuffled_feature_np = np.zeros(((len(reviews)), len(feature_key)),
                                   dtype=np.int)
    for i in range(len(reviews)):
        shuffled_feature_np[i] = feature_np[nums[i]]
    print(shuffled_feature_np)

    # define this function in util.py
    shuffled_feature_matrix, shuffled_ids = shuffle_dataset(feature_np, ids)
    print("ids before shuffle")
    print(ids)
    print("ids after shuffle")
    print(shuffled_ids)