예제 #1
0
 def SaveNpy(self, entries, sel):
     args = self.arggen(entries)
     data_path, fext, num, cr = args[0], args[1], int(args[2]), float(
         args[3])
     if sel == 'trlab':
         if selfold in sorted(os.listdir(os.path.join(data_path, train_p))):
             print(
                 'Augmented data found. Saving augmented data instead of original ones'
             )
             create_train_data(data_path, os.path.join(train_p, selfold),
                               os.path.join(label_p, selfold), fext)
         else:
             create_train_data(data_path, train_p, label_p, fext)
     if sel == 'test':
         create_test_data(data_path, test_p, fext)
예제 #2
0
def app():
    db_fd, db_path = tempfile.mkstemp()

    db_path_sqlized = 'sqlite:///' + db_path
    # env_vars = {'SECRET_KEY': 'dev', 'DATABASE_URI': db_path_sqlized}
    # with patch.dict('os.environ', env_vars):
    os.environ['DATABASE_URI'] = db_path_sqlized
    app = create_app({
        'TESTING': True,
        'DATABASE_URI': db_path_sqlized,
    })

    with app.app_context():
        init_db()
        create_test_data()

    yield app

    os.close(db_fd)
    os.unlink(db_path)
예제 #3
0
...
Lab3 Improve the accuracy of Lab2
Tips:
Try to change the training parameters: learning rate, epoch, batch size, etc.
Try to augment the image samples using the last example of https://keras.io/preprocessing/image/#imagedatagenerator
Try to replace the uppooling layer with deconvolution layer (ref. https://github.com/k3nt0w/FCN_via_keras )
Try to increase receptive fields by replace convolution2D with AtrousConvolution2D (is it same as reduce running resolution?)
Transfer learning

from data import create_train_data, create_test_data
create_train_data()
create_test_data()

from __future__ import print_function

from scipy import misc
import numpy as np
from keras.models import Model
from keras.layers import Input, merge, Convolution2D, MaxPooling2D, UpSampling2D, AtrousConvolution2D
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras import backend as K
import os

from data import load_train_data, load_test_data

K.set_image_dim_ordering('th')  # Theano dimension ordering in this code

original_img_rows = 1024
original_img_cols = 1024
running_img_rows = 256
예제 #4
0
        max_queue_size=50,
        workers=4,
        verbose=1,
        callbacks=callbacks,
        validation_data=valid_generator(imgs_test, imgs_mask_test,
                                        netprameval),
        validation_steps=np.ceil(
            float(len(imgs_test)) / float(netpram.batch_size)))


if __name__ == "__main__":
    netparam = params.init()
    netparameval = params.init(train=0)
    x = [[1, 3], [2, 5], [4, 8], [6, 7]]
    data.create_train_data(netparam)
    data.create_test_data(netparam)
    for indx in range(0, 4):
        imgs_train, imgs_mask_train = data.load_train_data()
        imgs_test, imgs_mask_test = data.load_test_data()
        np.random.seed(1234)
        Traindatagen = CustomImageDataGenerator(netparam, training=1)
        Validdatagen = CustomImageDataGenerator(netparam, training=1)
        d1 = str(x[indx][0])
        d2 = str(x[indx][1])
        ids_train = [
            i for i, s in enumerate(imgs_mask_train) if 'instrument_dataset_' +
            d1 not in s and 'instrument_dataset_' + d2 not in s
        ]
        ids_val = [
            i for i, s in enumerate(imgs_mask_test) if 'instrument_dataset_' +
            d1 in s or 'instrument_dataset_' + d2 in s
예제 #5
0
파일: main.py 프로젝트: j-abc/vectorizer
def main(output_file_path, input_file_path, create_train_test_data, training_data_file_path, testing_data_file_path, run_ngram_train, run_ngram_test,  cli):
    """ Main function
    """

    utilities.logger()
    log = logging.getLogger('Enron_email_analysis.main')
    log.info('Starting to run main.py.')
    start = time()

    if create_train_test_data:
        log.info('Starting to create training and testing data')
        email_data = data.read_data(create_train_test_data) # Read in data from csv or s3 using file path
        email_content = email_data['content']
        training_emails, testing_emails = train_test_split(email_content, test_size=.2, train_size=.8, shuffle=True)
        log.info('Split data sent into training and testing')
        training_emails.to_csv(f'{input_file_path}/training_email_data.csv', header=['email_text'])
        log.info(f'Successfully created training emails csv: {input_file_path}/training_email_data.csv')
        testing_emails.to_csv(f'{input_file_path}/testing_email_data.csv', header=['email_text'])
        log.info(f'Successfully created testing emails csv: {input_file_path}/testing_email_data.csv')
        data.create_test_data(testing_emails, input_file_path)
        log.info(f'Successfully created fill in the blank test csv: {input_file_path}')

    if run_ngram_train:
        log.info("Starting to train ngram model")
        training_emails = pd.read_csv(training_data_file_path)
        preprocessed_training_emails = preprocess.PreprocessText(training_emails['email_text']).preprocessed_text
        NgramTrain(preprocessed_training_emails, input_file_path) # Done creating training data and
        log.info("Successfully finished training ngram model")

    if run_ngram_test:
        log.info("Starting to test ngram model")
        test_fill_in_the_blank = pd.read_csv(testing_data_file_path)
        ngram_test = NgramTest(test_fill_in_the_blank, output_file_path)
        evaluation_statistics.Evaluation(ngram_test)
        log.info("Successfully finished testing ngram model")

    if cli:

        log.info("Welcome to the Personalized Thesaurus.")
        log.info("ABOUT: This thesaurus recommends you the best word based on your previous emails and the"
                 "\nmost similar word.")
        log.info("Starting to reading in forward and backward probability pickle files")
        bigram_forward_probability = data.read_pickle_file(f'model_input_data/bigram_forward_probability.pkl')
        log.info("Successfully finished reading in 1/4 pickle files.")
        bigram_backward_probability = data.read_pickle_file(f'model_input_data/bigram_backward_probability.pkl')
        log.info("Successfully finished reading in 2/4 pickle files.")

        trigram_forward_probability = data.read_pickle_file(f'model_input_data/trigram_forward_probability.pkl')
        log.info("Successfully finished reading in 3/4 pickle files.")
        trigram_backward_probability = data.read_pickle_file(f'model_input_data/trigram_backward_probability.pkl')
        log.info("Successfully finished reading in 4/4 pickle files.")

        word_vectors = api.load("glove-wiki-gigaword-100")

        while True:
            log.info('Ready for user input')
            before_blank_tokens, after_blank_tokens, word_to_replace = utilities.take_input('cli')
            log.info(f'Before the word to replace: {before_blank_tokens}')
            log.info(f'After the word to replace: {after_blank_tokens}')
            after_predictions = data.predict_next_word(before_blank_tokens, bigram_forward_probability, trigram_forward_probability, 'forward')
            before_predictions = data.predict_next_word(after_blank_tokens, bigram_backward_probability, trigram_backward_probability, 'backward')
            merged_predictions = after_predictions+before_predictions
            word_embedding_output = data.get_similar_words(word_to_replace, word_vectors)
            print(f'Personalized Output:')
            for probability, word in merged_predictions:
                print(word + '\t' + str(probability))
            print(f'Similar Words:')
            for word, probability  in word_embedding_output:
                print(word + '\t' + str(probability))

    end = time()
    time_difference = end - start
    summary_statistics = open(f'summary_statistics.txt', 'a')
    summary_statistics.write(f'The program ran for: {time_difference}\n')
    summary_statistics.close()