def SaveNpy(self, entries, sel): args = self.arggen(entries) data_path, fext, num, cr = args[0], args[1], int(args[2]), float( args[3]) if sel == 'trlab': if selfold in sorted(os.listdir(os.path.join(data_path, train_p))): print( 'Augmented data found. Saving augmented data instead of original ones' ) create_train_data(data_path, os.path.join(train_p, selfold), os.path.join(label_p, selfold), fext) else: create_train_data(data_path, train_p, label_p, fext) if sel == 'test': create_test_data(data_path, test_p, fext)
def app(): db_fd, db_path = tempfile.mkstemp() db_path_sqlized = 'sqlite:///' + db_path # env_vars = {'SECRET_KEY': 'dev', 'DATABASE_URI': db_path_sqlized} # with patch.dict('os.environ', env_vars): os.environ['DATABASE_URI'] = db_path_sqlized app = create_app({ 'TESTING': True, 'DATABASE_URI': db_path_sqlized, }) with app.app_context(): init_db() create_test_data() yield app os.close(db_fd) os.unlink(db_path)
... Lab3 Improve the accuracy of Lab2 Tips: Try to change the training parameters: learning rate, epoch, batch size, etc. Try to augment the image samples using the last example of https://keras.io/preprocessing/image/#imagedatagenerator Try to replace the uppooling layer with deconvolution layer (ref. https://github.com/k3nt0w/FCN_via_keras ) Try to increase receptive fields by replace convolution2D with AtrousConvolution2D (is it same as reduce running resolution?) Transfer learning from data import create_train_data, create_test_data create_train_data() create_test_data() from __future__ import print_function from scipy import misc import numpy as np from keras.models import Model from keras.layers import Input, merge, Convolution2D, MaxPooling2D, UpSampling2D, AtrousConvolution2D from keras.optimizers import Adam from keras.callbacks import ModelCheckpoint, LearningRateScheduler from keras import backend as K import os from data import load_train_data, load_test_data K.set_image_dim_ordering('th') # Theano dimension ordering in this code original_img_rows = 1024 original_img_cols = 1024 running_img_rows = 256
max_queue_size=50, workers=4, verbose=1, callbacks=callbacks, validation_data=valid_generator(imgs_test, imgs_mask_test, netprameval), validation_steps=np.ceil( float(len(imgs_test)) / float(netpram.batch_size))) if __name__ == "__main__": netparam = params.init() netparameval = params.init(train=0) x = [[1, 3], [2, 5], [4, 8], [6, 7]] data.create_train_data(netparam) data.create_test_data(netparam) for indx in range(0, 4): imgs_train, imgs_mask_train = data.load_train_data() imgs_test, imgs_mask_test = data.load_test_data() np.random.seed(1234) Traindatagen = CustomImageDataGenerator(netparam, training=1) Validdatagen = CustomImageDataGenerator(netparam, training=1) d1 = str(x[indx][0]) d2 = str(x[indx][1]) ids_train = [ i for i, s in enumerate(imgs_mask_train) if 'instrument_dataset_' + d1 not in s and 'instrument_dataset_' + d2 not in s ] ids_val = [ i for i, s in enumerate(imgs_mask_test) if 'instrument_dataset_' + d1 in s or 'instrument_dataset_' + d2 in s
def main(output_file_path, input_file_path, create_train_test_data, training_data_file_path, testing_data_file_path, run_ngram_train, run_ngram_test, cli): """ Main function """ utilities.logger() log = logging.getLogger('Enron_email_analysis.main') log.info('Starting to run main.py.') start = time() if create_train_test_data: log.info('Starting to create training and testing data') email_data = data.read_data(create_train_test_data) # Read in data from csv or s3 using file path email_content = email_data['content'] training_emails, testing_emails = train_test_split(email_content, test_size=.2, train_size=.8, shuffle=True) log.info('Split data sent into training and testing') training_emails.to_csv(f'{input_file_path}/training_email_data.csv', header=['email_text']) log.info(f'Successfully created training emails csv: {input_file_path}/training_email_data.csv') testing_emails.to_csv(f'{input_file_path}/testing_email_data.csv', header=['email_text']) log.info(f'Successfully created testing emails csv: {input_file_path}/testing_email_data.csv') data.create_test_data(testing_emails, input_file_path) log.info(f'Successfully created fill in the blank test csv: {input_file_path}') if run_ngram_train: log.info("Starting to train ngram model") training_emails = pd.read_csv(training_data_file_path) preprocessed_training_emails = preprocess.PreprocessText(training_emails['email_text']).preprocessed_text NgramTrain(preprocessed_training_emails, input_file_path) # Done creating training data and log.info("Successfully finished training ngram model") if run_ngram_test: log.info("Starting to test ngram model") test_fill_in_the_blank = pd.read_csv(testing_data_file_path) ngram_test = NgramTest(test_fill_in_the_blank, output_file_path) evaluation_statistics.Evaluation(ngram_test) log.info("Successfully finished testing ngram model") if cli: log.info("Welcome to the Personalized Thesaurus.") log.info("ABOUT: This thesaurus recommends you the best word based on your previous emails and the" "\nmost similar word.") log.info("Starting to reading in forward and backward probability pickle files") bigram_forward_probability = data.read_pickle_file(f'model_input_data/bigram_forward_probability.pkl') log.info("Successfully finished reading in 1/4 pickle files.") bigram_backward_probability = data.read_pickle_file(f'model_input_data/bigram_backward_probability.pkl') log.info("Successfully finished reading in 2/4 pickle files.") trigram_forward_probability = data.read_pickle_file(f'model_input_data/trigram_forward_probability.pkl') log.info("Successfully finished reading in 3/4 pickle files.") trigram_backward_probability = data.read_pickle_file(f'model_input_data/trigram_backward_probability.pkl') log.info("Successfully finished reading in 4/4 pickle files.") word_vectors = api.load("glove-wiki-gigaword-100") while True: log.info('Ready for user input') before_blank_tokens, after_blank_tokens, word_to_replace = utilities.take_input('cli') log.info(f'Before the word to replace: {before_blank_tokens}') log.info(f'After the word to replace: {after_blank_tokens}') after_predictions = data.predict_next_word(before_blank_tokens, bigram_forward_probability, trigram_forward_probability, 'forward') before_predictions = data.predict_next_word(after_blank_tokens, bigram_backward_probability, trigram_backward_probability, 'backward') merged_predictions = after_predictions+before_predictions word_embedding_output = data.get_similar_words(word_to_replace, word_vectors) print(f'Personalized Output:') for probability, word in merged_predictions: print(word + '\t' + str(probability)) print(f'Similar Words:') for word, probability in word_embedding_output: print(word + '\t' + str(probability)) end = time() time_difference = end - start summary_statistics = open(f'summary_statistics.txt', 'a') summary_statistics.write(f'The program ran for: {time_difference}\n') summary_statistics.close()