def analyse_subject(subject, paramspath, pipesdir, resultsdir): yml = load_yml(paramspath) data_params, analysis_params = parse_yml_params(yml) data_params['subject'] = subject X, y = get_data(data_params) pipelines = OrderedDict() pipepaths = sorted(glob.glob(pipesdir + 'pipeline_*')) for i, ymlpath in enumerate(pipepaths): yml = load_yml(ymlpath) label = yml['label'] pipelines[label] = parse_yml_pipeline(yml) print '' print 'Processing ' + str(len(pipepaths)), print 'pipelines for subject ' + str(subject) print '' scores = crossvalidation(X, y, pipelines, analysis_params) resultsdir = resultsdir + 'subject' + str(subject) + '/' if not os.path.exists(resultsdir): os.makedirs(resultsdir) for score, rstpath in zip(scores, pipepaths): rstpath = rstpath.split('/')[-1] rstpath = string.join([rstpath.split('.')[0]] + ['pkl'], '.') rstpath = resultsdir + rstpath joblib.dump(score, rstpath)
def get_features(regenerate=True): if regenerate: agg, log, flg = get_data() features = flg.loc[:, 'USRID':'USRID'] all_user_id = flg.loc[:, 'USRID':'USRID'] feature_types = list() for feature in FEATURE_LIST: print(feature[0]) feature_val, feature_type = feature[1](agg, log, all_user_id) features = pd.merge(features, feature_val, on=['USRID'], how='left') feature_types += feature_type features.to_csv('./feature/features.csv') with open('./feature/feature_types', 'wb') as f: pickle.dump(feature_types, f) flg.to_csv('./feature/flg.csv') else: features = pd.read_csv('./feature/features.csv', index_col=0) with open('./feature/feature_types', 'rb') as f: feature_types = pickle.load(f) flg = pd.read_csv('./feature/flg.csv', index_col=0) features = features.reset_index(drop=True) flg = flg.reset_index(drop=True) train_features = features[flg['FLAG'] != -1] test_features = features[flg['FLAG'] == -1] train_flg = flg[flg['FLAG'] != -1] test_flg = flg[flg['FLAG'] == -1] train = [train_features, train_flg] test = [test_features, test_flg] return train, test, feature_types
def gen_data(): label_map = {'none': 0, 'racism': 1, 'sexism': 2} tweet_data = get_data() for tweet in tweet_data: texts.append(tweet['text'].lower()) labels.append(label_map[tweet['label']]) print('Found %s texts. (samples)' % len(texts))
def main_fast_text(): tweet_data = get_data() for tweet in tweet_data: texts.append(tweet['text']) labels.append(label_map[tweet['label']]) print('Found %s texts. (samples)' % len(texts)) EMBEDDING_DIM = 25 GLOVE_MODEL_FILE = "glove.twitter.27B.25d.txt" tokenizer = "glove" if tokenizer == "glove": TOKENIZER = glove_tokenize elif tokenizer == "nltk": TOKENIZER = tokenize_nltk.casual.TweetTokenizer(strip_handles=True, reduce_len=True).tokenize word2vec_model = gensim.models.KeyedVectors.load_word2vec_format(GLOVE_MODEL_FILE) tweets = select_tweets(TOKENIZER, word2vec_model) gen_vocab(TOKENIZER, tweets) X, y = gen_sequence(TOKENIZER, tweets) MAX_SEQUENCE_LENGTH = max(map(lambda x:len(x), X)) print("max seq length is %d"%(MAX_SEQUENCE_LENGTH)) data = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH) y = np.array(y) W = get_embedding_weights(EMBEDDING_DIM, word2vec_model) data, y = sklearn.utils.shuffle(data, y) model = fast_text_model(data.shape[1], EMBEDDING_DIM) _ = train_fasttext(data, y, model, EMBEDDING_DIM, W) table = model.layers[0].get_weights()[0] pdb.set_trace()
def test(dataset, ckpt): """ Train the model **input: ** *dataset: (String) Dataset folder to used *ckpt: (String) [Optional] Path to the ckpt file to restore """ # Load name of id with open("signnames.csv", "r") as f: signnames = f.read() id_to_name = { int(line.split(",")[0]): line.split(",")[1] for line in signnames.split("\n")[1:] if len(line) > 0 } # Get Test dataset _, _, _, _, X_test, y_test = get_data(dataset) X_test = X_test / 255 model = ModelTrafficSign("TrafficSign", output_folder=None) # Load the model model.load(ckpt) # Evaluate all the dataset loss, acc, predicted_class = model.evaluate_dataset(X_test, y_test) print("Accuracy = ", acc) print("Loss = ", loss) # Get the confusion matrix cnf_matrix = confusion_matrix(y_test, predicted_class) np.savetxt("cnf.txt", cnf_matrix)
def select_tweet_frame(filename): if filename == 'tokenized_tweets_train.txt': train_tweets = get_data('tokenized_tweets_train.txt') elif filename == 'tokenized_tweets_test.txt': test_tweets = get_data('tokenized_tweets_test.txt') tweet_return = [] if filename == 'tokenized_tweets_train.txt': for tweet, frame in zip(train_tweets,open('frames.txt','r')): tweet_return.append((tweet,frame.strip())) print('Tweets selected:', len(tweet_return)) return tweet_return else: for tweet, frame in zip(test_tweets,open('frames_test.txt','r')): tweet_return.append((tweet,frame.strip())) print('Tweets selected:', len(tweet_return)) return tweet_return
def select_tweets(): # selects the tweets as in mean_glove_embedding method # Processing tweet_return_file = "cnn_tweets.pickle" # Load if pickled files are available try: tweet_return = pickle.load(open(tweet_return_file, "rb")) print "Tweets loaded from pickled file." # Create and save otherwise except (OSError, IOError) as e: print "Loading tweets with embeddings available..." tweets = get_data() tweet_return = [] for tweet in tweets: _emb = 0 words = TOKENIZER(tweet['text'].lower()) for w in words: if w in word2vec_model: # Check if embeeding there in GLove model _emb += 1 if _emb: # Not a blank tweet tweet_return.append(tweet) pickle.dump(tweet_return, open(tweet_return_file, "wb")) print 'Tweets selected:', len(tweet_return) return tweet_return
def save_selected_feature_results_to_sql(selected_feature_sets): name, features = selected_feature_sets full_feature_set = models.FEATURE_SETS new_feature_set = ['none'] classifiers = models.CLASSIFIERS prefix = "results_%s" % name unselected_feature_sets = [ f for f in full_feature_set if f not in features ] if "halves" in features: polynomial_terms = feature_set_list.halves_features() else: polynomial_terms = None to_drop = [] for feature_set in unselected_feature_sets: if feature_set == "cfg": to_drop += feature_set_list.cfg_features() elif feature_set == "syntactic_complexity": to_drop += feature_set_list.syntactic_complexity_features() elif feature_set == "psycholinguistic": to_drop += feature_set_list.psycholinguistic_features() elif feature_set == "vocabulary_richness": to_drop += feature_set_list.vocabulary_richness_features() elif feature_set == "repetitiveness": to_drop += feature_set_list.repetitiveness_features() elif feature_set == "acoustics": to_drop += feature_set_list.acoustics_features() elif feature_set == "demographic": to_drop += feature_set_list.demographic_features() elif feature_set == "parts_of_speech": to_drop += feature_set_list.parts_of_speech_features() elif feature_set == "information_content": to_drop += feature_set_list.information_content_features() elif feature_set == "strips": to_drop += feature_set_list.strips_features() elif feature_set == "halves": to_drop += feature_set_list.halves_features() elif feature_set == "quadrant": to_drop += feature_set_list.quadrant_features() for feature_set in new_feature_set: print 'Saving features: %s' % name X, y, labels = data_handler.get_data(drop_features=to_drop, polynomial_terms=polynomial_terms) print "Number of features used: ", len(X.values[0]) trained_models = { model: DementiaCV(classifiers[model], X=X, y=y, labels=labels).train_model('default') for model in classifiers } save_models_to_sql_helper(trained_models, prefix)
def driver(classifier): print (getTitle(classifier)) if classifier == 4: trainX, trainY, testX, testY = data_handler.splitData2TestTrain('ATNTFaceImages400.txt', 10, '1:10') print ("\nAverage Accuracy for 5 folds: %s"% SVM.cross_validate(trainX, trainY, testX, testY)) else: data, indexes = data_handler.get_data("ATNTFaceImages400.txt") print ("\nAverage Accuracy for 5 folds: %s"%cross_validator(5, data, indexes, classifier))
def callback(): # Auth Step 4: Requests refresh and access tokens global auth_token, post_request #check to see if we already received authorization if auth_token == None: auth_token = request.args['code'] code_payload = { "grant_type": "authorization_code", "code": str(auth_token), "redirect_uri": REDIRECT_URI, 'client_id': CLIENT_ID, 'client_secret': CLIENT_SECRET, } if post_request == None: post_request = requests.post(SPOTIFY_TOKEN_URL, data=code_payload) # Auth Step 5: Tokens are Returned to Application response_data = json.loads(post_request.text) access_token = response_data["access_token"] refresh_token = response_data["refresh_token"] token_type = response_data["token_type"] expires_in = response_data["expires_in"] dh.get_data(access_token, TIME_RANGE) # Retrieve and populate the datasets to display on the page top_track_names = dh.get_top_track_names() top_artist_names = dh.get_top_artist_names() top_artist_image = dh.get_top_artist_image() genres_data = dh.get_top_genres_data() viz.create_top_genres_pie_chart(genres_data, GENRE_PIE_CHART_FILE_PATH) viz.create_acoustic_vs_non_acoustic_pie_chart( dh.get_acoustic_data(), ACOUSTIC_PIE_CHART_FILE_PATH) viz.create_live_vs_studio_pie_chart(dh.get_live_data(), LIVE_PIE_CHART_FILE_PATH) top_genres = genres_data['top_50_genres_list'] return render_template('stat-query.html', artists=top_artist_names, tracks=top_track_names, top_artist_image=top_artist_image, genres=top_genres)
def __init__(self): print(os.path.join(config.out_dir, "171214_1.txt")) self.visualizer = visualization.VisualizerOpencv() self.position_data = data_handler.get_data(os.path.join(config.out_dir, "171214_1.txt")) self.face_aligner = FaceAlignment(LandmarksType._3D, device='cuda:0', flip_input=True) self.positions = {} self.s_frames = utils.load_seq_video() self.data = {name: [] for name in self.position_data} self.cur_img = None
def select_tweets(): # selects the tweets as in mean_glove_embedding method train_tweets, test_tweets = get_data() tweet_return = [] for tweet in train_tweets: _emb = 0 words = TOKENIZER(tweet['text'].lower()) for w in words: if w in word2vec_model: # Check if embedding there in GLove model _emb += 1 if _emb: tweet_return.append(tweet) return tweet_return, test_tweets
def select_tweets(filename): # selects the tweets as in mean_glove_embedding method # Processing if filename == 'tokenized_tweets_train.txt': train_tweets = get_data('tokenized_tweets_train.txt') elif filename == 'tokenized_tweets_test.txt': test_tweets = get_data('tokenized_tweets_test.txt') tweet_return = [] if filename == 'tokenized_tweets_train.txt': c = 1 for tweet in train_tweets: _emb = 0 words = glove_tokenize(tweet['text'].lower()) for w in words: if w in word2vec_model: # Check if embeeding there in GLove model _emb+=1 c = c+1 # if _emb: # Not a blank tweet tweet_return.append(tweet) print('Tweets selected:', len(tweet_return)) #pdb.set_trace() return tweet_return else: c = 1 for tweet in test_tweets: _emb = 0 words = glove_tokenize(tweet['text'].lower()) for w in words: if w in word2vec_model: # Check if embeeding there in GLove model _emb+=1 c = c+1 # if _emb: # Not a blank tweet tweet_return.append(tweet) print('Tweets selected:', len(tweet_return)) #pdb.set_trace() return tweet_return
def select_tweets_whose_embedding_exists(): # selects the tweets as in mean_glove_embedding method # Processing tweets = get_data() X, Y = [], [] tweet_return = [] for tweet in tweets: _emb = 0 words = TOKENIZER(tweet['text'].lower()) for w in words: if w in word2vec_model: # Check if embeeding there in GLove model _emb += 1 if _emb: # Not a blank tweet tweet_return.append(tweet) print('Tweets selected:', len(tweet_return)) return tweet_return
def getAbusiveFeatures(): f = open('abusive_dict.txt', 'r') m = {} for line in f: line = line.strip() m[line] = True tweets = get_data() X = [] for tweet in tweets: text = glove_tokenize(tweet['text'].lower()) c = 0 for word in text: if word in m: c = c + 1 X.append(c) return np.array(X)
def calculate_build_data(): breed, height, mass = dta.get_data() model, params = fit.fit_curve(height, mass) height_curve, mass_curve = model.to_points(params, heigth) height_scale = np.var(height) mass_scale = np.var(mass) d_path = np.sqrt((np.diif(height_curve)**2) / height_scale + (np.diff(maas_curve)**2) / mass_scale) path_position = np.cumsum(d_path) distance, size_indices = fit.distance_to_curve(height, mass, height_curve, mass_curve) body_size = path_position[size_indeces - 1] build = 10 * distance / np.sqrt(body_size) estimated_mass = model.evaluate(params, height) i_negative = np.where(mass < estimated_mass)[0] build[i_negative] = -build[i_negative] return body_size, build, breed
def select_tweets(tokenizer, word2vec_model): # selects the tweets as in mean_glove_embedding method # Processing tweets = get_data() X, Y = [], [] tweet_return = [] for tweet in tweets: _emb = 0 words = tokenizer(tweet['text'].lower()) for w in words: if w in word2vec_model: # Check if embeeding there in GLove model _emb += 1 if _emb: # Not a blank tweet tweet_return.append(tweet) print('Tweets selected:', len(tweet_return)) #pdb.set_trace() return tweet_return
def select_tweets_whose_embedding_exists(): # selects the tweets as in mean_glove_embedding method # In this function, we are only checking whether an embedding exists # for at least one word within the tweet. If it does, we "accept" the tweet # Processing tweets = get_data() X, Y = [], [] tweet_return = [] for tweet in tweets: _emb = 0 words = TOKENIZER(tweet['text'].lower()) for w in words: if w in word2vec_model: # Check if embeeding there in GLove model _emb += 1 if _emb: # Not a blank tweet tweet_return.append(tweet) print 'Tweets selected:', len(tweet_return) return tweet_return
def select_tweets_whose_embedding_exists(): # selects the tweets as in mean_glove_embedding method # Processing tweets = get_data() X, Y = [], [] tweet_return = [] for tweet in tweets: _emb = 0 # words = glove_tokenize(tweet['text']) text = tweet['text'].encode("utf-8") words = glove_tokenize(text) for w in words: if w in vocab_json: # Check if embeeding there in GLove model _emb += 1 if _emb: # Not a blank tweet tweet_return.append(tweet) print 'Tweets selected:', len(tweet_return) #pdb.set_trace() return tweet_return
def get_tfidf_features(): tweets = get_data() # getting list of tweets (each tweet in a map format with keys text, label and user) y_map = { 'none': 0, 'racism': 1, 'sexism': 2 } X, y = [], [] flag = True for tweet in tweets: text = glove_tokenize(tweet['text'].lower()) # tokenizing like converting # into <hashtag> etc. text = ' '.join([c for c in text if c not in punctuation]) # removing punctuation X.append(text) y.append(y_map[tweet['label']]) tfidf_transformer = TfidfVectorizer(ngram_range=(1,2), analyzer='word',stop_words='english',max_features=5000) X_tfidf = tfidf_transformer.fit_transform(X) print(X_tfidf.shape) return X_tfidf, np.array(y)
def get_tfidf_features(): tweets = get_data() X, y = [], [] flag = True for tweet in tweets: text = glove_tokenize(tweet['text'].lower()) text = ' '.join([c for c in text if c not in punctuation]) if y_map[tweet['label']] == 2: X.append(text) y.append(int([tweet['label']])) tfidf_transformer = TfidfVectorizer(ngram_range=(1, 2), analyzer='word', stop_words='english', max_features=2000) X_tfidf = tfidf_transformer.fit_transform(X) print(X_tfidf.shape) get_top_features(tfidf_transformer) return X_tfidf, np.array(y)
async def handleGet(request): try: request_type = request.rel_url.query['type'] except: return web.Response(text="You must send the parameter type", status=406) try: data = get_data(request_type) if data == -1: return web.Response( text= "You sent an invalid type parameter. Valid types: feminino, masculino, acessorio, all", status=406) except Exception as e: print(str(e)) return web.Response(text="Something went wrong. Please try later", status=500) return web.json_response(body=data)
def test(dataset, ckpt): """ Train the model **input: ** *dataset: (String) Dataset folder to used *ckpt: (String) [Optional] Path to the ckpt file to restore """ # Load name of id with open("signnames.csv", "r") as f: signnames = f.read() id_to_name = { int(line.split(",")[0]): line.split(",")[1] for line in signnames.split("\n")[1:] if len(line) > 0 } # Get Test dataset _, _, _, _, X_test, y_test = get_data(dataset) X_test = X_test / 255 model = ModelTrafficSign("TrafficSign", output_folder=None) # Load the model model.load(ckpt) # Evaluate all the dataset loss, acc, predicted_class = model.evaluate_dataset(X_test, y_test) print("Accuracy = ", acc) print("Loss = ", loss) # Get the confusion matrix cnf_matrix = confusion_matrix(y_test, predicted_class) # Plot the confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=[str(i) for i in range(43)], title='Confusion matrix, without normalization') plt.show()
def getAbusiveFeatures(): y_map = { 'none': 0, 'racism': 1, 'sexism': 2 } f = open('abusive_dict.txt','r') m = {} for line in f: line = line.strip() m[line]=True tweets = get_data() X, y = [], [] for tweet in tweets: text = glove_tokenize(tweet['text'].lower()) # does it correct spelling as well? c = 0 for word in text: if word in m: c = c+1 X.append([c]) y.append(y_map[tweet['label']]) return np.array(X),np.array(y)
def get_liwc_features_from_text(): filenames = glob.glob("./LIWC_features/*.csv") print(filenames) y_map = { 'none': 0, 'racism': 1, 'sexism': 2 } tweets = get_data() X, y = [], [] # create a dict of lists of words in all liwc files features_dict = {} for file in filenames: f = open(file,'r') m = {} for line in f: line = line.strip() m[line]=True features_dict[file] = m for tweet in tweets: text = glove_tokenize(tweet['text'].lower()) features = [] for file in filenames: c = 1 for word in text: if any([word.startswith(s) for s in features_dict[file]]): c = c+1 features.append(c) X.append(features) y.append(y_map[tweet['label']]) # normalised results X = np.array(X) X = (X - X.mean(axis=0)) / X.std(axis=0) return X, np.array(y)
learning_rate = 0.001 batch_size = 64 num_epochs = 100 # Get pretrained Glove weights pretrained_glove = get_GloveEmbed(word_index, glove_path, vocab_size, embed_dim) # Get main target class weights based on training set pred_target_weights = np.genfromtxt(target_class_weights_path, delimiter=',') pred_target_weights = torch.tensor(pred_target_weights).type(torch.float) # Get adv target class weights based on training set adv_target_weights = np.genfromtxt(adv_target_class_weights_path, delimiter=',') # Get train, validation and test data train_set = get_data(train_path, word_index, seq_length, debias, batch_size, num_workers, shuffle=True, pin_memory=pin_memory) val_set = get_data(val_path, word_index, seq_length, False, batch_size, num_workers, shuffle=True, pin_memory=pin_memory) # Train loop for iter in range(len(grid)): print(f"=> Start Training of Model {iter}") writer = SummaryWriter(f'debias_inf_board/training/models/debias {iter}') # Initialize networks me = ME(vocab_size, embed_dim, pretrained_glove, train_embed, hme_hidden, dropout, device) ce = CE(ce_input, hce_hidden) predictor = Predictor((hce_hidden+hme_hidden), pred_hidden, pred_classes) adversary = Adversary(hme_hidden, adv_hidden, adv_classes)
import operator import gensim, sklearn from collections import defaultdict from batch_gen import batch_gen from my_tokenizer import glove_tokenize import xgboost as xgb import ast import h5py import pickle ### Preparing the text data texts = [] # list of text samples labels_index = {} # dictionary mapping label name to numeric id labels = [] # list of label ids label_map = {'none': 0, 'racism': 1, 'sexism': 2} tweet_data = get_data() for tweet in tweet_data: texts.append(tweet['text'].lower()) labels.append(label_map[tweet['label']]) print('Found %s texts. (samples)' % len(texts)) # logistic, gradient_boosting, random_forest, svm, tfidf_svm_linear, tfidf_svm_rbf model_count = 2 word_embed_size = 200 GLOVE_MODEL_FILE = "glove_embeddings/glove.twitter.27B.200d.txt" EMBEDDING_DIM = 200 MODEL_TYPE = sys.argv[1] print 'Embedding Dimension: %d' % (EMBEDDING_DIM) print 'GloVe Embedding: %s' % (GLOVE_MODEL_FILE) #Load model
task += '-' + init model_dir = models_dir + task if not os.path.exists(model_dir): os.makedirs(model_dir) else: print 'already exist. exiting...' exit(-1) logger = get_logger(task, model_dir) logger.info(arguments) logger.info(task) home = expanduser("~") configure(tensorboard_dir + task) x_train, x_test = get_data(task_str, input_dir) np.random.shuffle(x_train) out_size = 2 with open(input_vocab, 'r') as f: vocab = f.readlines() vocab = map(lambda s: s.strip(), vocab) vocab_size = len(vocab) adv_net = AdvNN(hid_size, hid_size, out_size, hid_size, adv_hid_size, out_size,
# for "yellow" and "blue" extract the tokens # put them into a tensor # get the embeddings of those 2 words # use np.inner to get the semantic simmilarity # what about blue and car? # can you come up with a different way of computing the semantic simmilarity? import torch import numpy as np import data_handler as dh model = torch.load('transformer_model1.pth', map_location=torch.device('cpu')) _, _, _, vocab = dh.get_data() # index (dtype = int) of the word (token) return from vocab yellow = vocab['yellow'] blue = vocab['blue'] car = vocab['car'] print('Index of word yellow is: ', yellow) print('Index of word blue is: ', blue) print('Index of word car is: ', car) # convert the index into tensor yellow = torch.tensor(yellow) blue = torch.tensor(blue) car = torch.tensor(car)
def train(dataset, ckpt=None, output=None): """ Train the model **input: ** *dataset: (String) Dataset folder to used *ckpt: (String) [Optional] Path to the ckpt file to restore *output: (String) [Optional] Path to the output folder to used. ./outputs/ by default """ def preprocessing_function(img): """ Custom preprocessing_function """ img = img * 255 img = Image.fromarray(img.astype('uint8'), 'RGB') img = ImageEnhance.Brightness(img).enhance(random.uniform(0.6, 1.5)) img = ImageEnhance.Contrast(img).enhance(random.uniform(0.6, 1.5)) return np.array(img) / 255 X_train, y_train, X_valid, y_valid, X_test, y_test = get_data(dataset) X_train = X_train / 255 X_valid = X_valid / 255 X_test = X_test / 255 train_datagen = ImageDataGenerator() train_datagen_augmented = ImageDataGenerator( rotation_range=20, shear_range=0.2, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True, preprocessing_function=preprocessing_function) inference_datagen = ImageDataGenerator() train_datagen.fit(X_train) train_datagen_augmented.fit(X_train) inference_datagen.fit(X_valid) inference_datagen.fit(X_test) # Utils method to print the current progression def plot_progression(b, cost, acc, label): print("[%s] Batch ID = %s, loss = %s, acc = %s" % (label, b, cost, acc)) # Init model model = ModelTrafficSign("TrafficSign", output_folder=output) if ckpt is None: model.init() else: model.load(ckpt) # Training pipeline b = 0 valid_batch = inference_datagen.flow(X_valid, y_valid, batch_size=BATCH_SIZE) best_validation_loss = None augmented_factor = 0.99 decrease_factor = 0.80 train_batches = train_datagen.flow(X_train, y_train, batch_size=BATCH_SIZE) augmented_train_batches = train_datagen_augmented.flow( X_train, y_train, batch_size=BATCH_SIZE) while True: next_batch = next(augmented_train_batches if random. uniform(0, 1) < augmented_factor else train_batches) x_batch, y_batch = next_batch ### Training cost, acc = model.optimize(x_batch, y_batch) ### Validation x_batch, y_batch = next(valid_batch, None) # Retrieve the cost and acc on this validation batch and save it in tensorboard cost_val, acc_val = model.evaluate(x_batch, y_batch, tb_test_save=True) if b % 10 == 0: # Plot the last results plot_progression(b, cost, acc, "Train") plot_progression(b, cost_val, acc_val, "Validation") if b % 1000 == 0: # Test the model on all the validation print("Evaluate full validation dataset ...") loss, acc, _ = model.evaluate_dataset(X_valid, y_valid) print("Current loss: %s Best loss: %s" % (loss, best_validation_loss)) plot_progression(b, loss, acc, "TOTAL Validation") if best_validation_loss is None or loss < best_validation_loss: best_validation_loss = loss model.save() augmented_factor = augmented_factor * decrease_factor print("Augmented Factor = %s" % augmented_factor) b += 1