def __init__(self, optimizer_id): self.map = 'sub-maxprob-thr50-2mm' # This is the brain atlas we use for normalization (from HarvardOxford) self.CV = 4 # The number of random subsets to test our data on self.connectivity_metric = 'tangent' # The type of functional connectivity extraction we use self.times_to_run = 3000 # Number of times that we randomly generate and test params before ending self.verbose = True # Whether to print out model results after each iteration self.csv = True # Whether to output results to a csv file or not self.estimator_chance = 0.5 # The chance that an estimator will be included self.shuffle_models = True # Whether to shuffle the classifier order self.maxes = { # Set maximum values for the random parameter generator 'estimators': 2, 'mlp_layers': 3, # Maximum MLP Layers 'mlp_nodes': 150, # Maximum nodes in each MLP layer 'xgb_trees': 130, # Maximum number of XGB trees 'rf_tress': 130, # Maximum number of RF tress 'early_stopping': 3 # Maximum number of early stopping } self.models_consider = { # Which models to consider during optimization 'rf': True, # Include Random Forests? 'xgb': True, # Include XGB? 'mlp': True, # Include MLP? 'svc': True, # Include SVC? 'logit': True, # Include Logit? } self.classifier_atr_choices = { # Certain choice attributes for classifiers 'mlp': ['sgd', 'lbfgs', 'adam'], # MLP solver choices 'svc': ['rbf', 'linear', 'poly'] # SVC Kernel Choices } self.csvFile = 'optimizer_' + optimizer_id + '_metrics' + '.csv' # Set the CSV file name to include some useful information pickled_features, pickled_labels = check_and_get_pickled_data() # Check and see if biomarkers are already # created try: if not pickled_features or not pickled_labels: # If we don't already have the data cached locally masker = get_atlas_data(self.map) # Generate a mask using the HarvardOxford atlas adhd_data = generate_train_data() # Retrieve the data from my hard drive masked_fmris = apply_masks(adhd_data.func, masker) features, adhd_labels = make_connectivity_biomarkers(self.connectivity_metric, adhd_data.labels, adhd_data, masked_fmris) # Calculate functional connectivity and combine phenotypic information as a feature. Returns a matrix # containing phenotypic information and computed functional connectivity -> features else: features, adhd_labels = pickled_features, pickled_labels # If it is cached, retrieve it except ValueError: features, adhd_labels = pickled_features, pickled_labels # If it is cached, retrieve it self.features = features self.labels = adhd_labels Helpers.write_attributes(optimizer_id, self.CV, self.times_to_run, self.estimator_chance, self.maxes, self.classifier_atr_choices, self.models_consider)
def run_hdiutil_command(self, *args, **kwargs): args = ['hdiutil'] + list(args) try: out = Helpers.run_command(*args, **kwargs) except Exception, e: # Some commands require the disk-image to be mounted/unmounted. If ran when it isn't it returns # saying 'Resource temporarily unavailable' if 'Resource temporarily unavailable' in str(e): self.detach() if self.is_mounted() else self.attach() out = Helpers.run_command(*args, **kwargs)
def make_connectivity_biomarkers(kind, labels, adhd200, pooled_subjects): """ This function takes the masked fMRI volumes and the corresponding phenotypic information (age, gender and dexterity) and turns them into a 2D array for doing ML classification. If there is no phenotypic information available, we exlude it from the dataset. :param kind: (str) The type of functional connnectity we extract :param labels: (list) The truth values for the ADHD200 dataset :param adhd200: (ADHD200) The ADHD200 object :param pooled_subjects: (list) The masked fMRI volumes :return: (list) features, (list) labels """ new_labels = [] # Initialize a new list for containing the new labels (only labels for fMRI volumes that # have corresponding phenotypic information temp_features = [] # Initialize a new list for containing the new labels (only labels for fMRI volumes that # have corresponding phenotypic information conn_measure = ConnectivityMeasure(kind=kind, vectorize=True, discard_diagonal=True) # Generate the functional # connectivity using the biomarker specified connectivity = conn_measure.fit_transform(pooled_subjects) # Apply it to all of the masked fMRI scans bar = ProgressBar(max_value=len(adhd200.func)) # Instantiate a new progressbar ops = 0 # Set the default value of the bar to 0 for index in range(len(adhd200.func)): phenotypic_information = Helpers.get_params(adhd200, adhd200.func[ index]) # Retrieve the corresponding phenotypic information for each fMRI ops += 1 # Increment the bar by one bar.update(ops) # Update the progressbar to the value of the variable "ops" if phenotypic_information is not None: # If we found phenotypic information for that fMRI new_labels.append(labels[index]) # Add it to the "approved" labels list generated_features = np.array( [Helpers.conform_1d(phenotypic_information, connectivity[index].shape[0]), connectivity[index]]) # Add the phenotypic information and the functional connectivity as a matrix. We have to # surround the phenotypic information by 0s to make it the same shape as the connectivity (conform 1d) temp_features.append(generated_features) # add it to the temp features else: continue # Skip that fMRI scan from the dataset d3_dataset = np.array(temp_features) # Convert the 3D temp_features array to a numpy array nsamples, nx, ny = d3_dataset.shape # Extract the dimensionality of the data d2_functional_connectivity = d3_dataset.reshape((nsamples, nx * ny)) # Convert it to 2 dimensions with open('pickles/features.pkl', 'wb') as features_file: # Cache the features so that we don't have to run this # function again dump(d2_functional_connectivity, features_file) # Dump them to the pickle file with open('pickles/adhd_labels.pkl', 'wb') as labels_file: # Cache the biomarkers so that we don't have to run this # function again dump(new_labels, labels_file) # Dump them to the pickle file return d2_functional_connectivity, new_labels # Return them
def size(self, size): size = Helpers.get_bytes(size) if isinstance(size, str) else size # General validation testing that size is an int and that space is available on disk if not Helpers.is_float(size): raise Exception('Invalid argument. Size must be an integer') elif size >= Helpers.bytes_available(): raise Exception('Invalid argument. Size is too large, not enough space.') # Different handling cases depending on if size has been assigned before if self._size: self.run_hdiutil_command('resize', self.path, size=Helpers.hr_bytes(size)) self._size = size
def diskutil_info(self): UTILITY_NAME = 'diskutil' self.attach() response = Helpers.run_command(UTILITY_NAME, 'info', self.get_mounting_point()) non_empty_lines = [line for line in response.splitlines() if line != ''] return {line.split(':')[0].lstrip() : line.split(':')[-1].strip() for line in non_empty_lines}
def mp3(self, url): path = Helpers.check_platform('Music') self.create_path(path) ydl_opts = utils.ydl_options() with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) print(colored('Download Completed', 'green'))
def mp4(self, url): path = Helpers.check_platform('Videos') self.create_path(path) try: with youtube_dl.YoutubeDL({}) as ydl: ydl.download([url]) print(colored('Download Completed', 'green')) except: print(colored('something went wrong try again', 'red'))
def playlist(self, url): playlist = pafy.get_playlist(url) path = Helpers.check_platform() + "/Playlists/{}".format( playlist['title']) join_ = os.path.join(path) file = Helpers.check_platform('Playlists') if os.path.isdir(file) == True: pass else: os.mkdir(file) if os.path.isdir(path) == True: pass else: os.mkdir(join_) os.chdir(join_) utils.playlist_info(url) with youtube_dl.YoutubeDL({}) as ydl: ydl.download([url])
def run(self): """ Run the model self.times_to_run times using random parameters. Export the results to a CSV file """ for times in range(0, self.times_to_run): # Loop through the number of times we have to run output = { # Initialize an empty dictionary containing the results from each iteration 'accuracies': [], 'f1s_positive': [], 'precisions_positive': [], 'recalls_positive': [], 'f1s_negative': [], 'precisions_negative': [], 'recalls_negative': [], 'true_negative': [], 'false_positive': [], 'false_negative': [], 'true_positive': [] } random_attributes = self._random_args() # Generate random model parameters layer_order = self._find_order(random_attributes) for cv_run in range(self.CV): accuracy, positive_metrics, negative_metrics, confusion_metrics = run_model( self.features, self.labels, random_attributes, verbose=True ) # Run the model and get metrics from that run output['accuracies'].append(accuracy) # Add this iteration's metrics to the CV array output['f1s_positive'].append(positive_metrics['f1']) output['f1s_negative'].append(negative_metrics['f1']) output['precisions_positive'].append(positive_metrics['precision']) output['precisions_negative'].append(negative_metrics['precision']) output['recalls_negative'].append(negative_metrics['recall']) output['recalls_positive'].append(positive_metrics['recall']) output['true_negative'].append(confusion_metrics['true_negative']) output['false_positive'].append(confusion_metrics['false_positive']) output['false_negative'].append(confusion_metrics['false_negative']) output['true_positive'].append(confusion_metrics['true_positive']) print 'Ran {0} times (iteration {1})'.format(times, cv_run), random_attributes data = Helpers.generate_csv_data(layer_order, output['accuracies'], [output['f1s_negative'], output['f1s_positive']], [output['precisions_negative'], output['precisions_positive']], [output['recalls_negative'], output['recalls_positive']], [output['true_negative'], output['false_positive'], output['false_negative'], output['true_positive']]) # Generate the dictionary for the CSV File self._csv_writer(random_attributes, data) # Write the data to a file
def generate_data_model(self, image_info): ext = self.path.split('.')[-1] options = { 'volname' : 'Volume Name', 'fs' : 'File System Personality', 'size' : 'Total Size' } options = {k: image_info[v] for k, v in options.items()} str_size = ' '.join(options['size'].split(' ')[:2]) options['size'] = Helpers.get_bytes(str_size) options['type'] = ext if ext != 'dmg' else 'UDIF' return options
def test_jss_stage(self): driver = self.driver WebDriverWait(driver, 10).until_not(lambda x: x.find_element_by_xpath( '//*[@style="display: block;"]').is_displayed) helper = Helpers() helper.field(self, driver.find_element(By.ID, l.search_ref), time_stamp) driver.find_element_by_xpath( '//*[@id="downloadProjects-form"]//*[@data-target="list"]').click( ) WebDriverWait(driver, 10).until_not(lambda el: el.find_element( By.XPATH, '//*[@id="tasks-table"]/tbody/tr[2]').is_displayed) time.sleep(2) project_ref_num = driver.find_element_by_xpath( '//*[@id="tasks-table"]/tbody/tr/td[7]/a') project_ref_num.click() time.sleep(2) WebDriverWait(driver, 10).until( lambda x: x.find_element_by_id(l.project_code).is_displayed()) assert driver.find_element(By.ID, l.project_code).is_displayed() auth = Authenticate() auth.delete_project(time_stamp)
def _csv_writer(self, iteration_input, iteration_output): """ Write outputs to csv file :param iteration_input: (dict) the parameters that were into the model :param iteration_output: (dict)the returned result from the CV :param csvFile: (string) path to a csv file for outputting :return: None """ if self.csv: is_new_file = not exists(self.csvFile) # Check if the file exists so we know whether to write the header with open(self.csvFile, 'a') as csv_file: # Open the tsv file for appending writer = DictWriter(csv_file, fieldnames=Helpers.fieldnames, delimiter=',') # Initialize a tsv writer (using dictionary) if is_new_file: writer.writeheader() # If the file is new, create a header writer.writerow( Helpers.merge_two_dicts(iteration_input, iteration_output)) # Write a new merged dictionary
def download_type(): music_type = input(colored('Press A for Audio V for Video P for Playlist : > ', 'green')) print(colored('processing you download request ...........', 'blue')) if music_type == "A" or music_type == "a": Helpers.save_url_(url) Audio.mp3(url) elif music_type == 'V' or music_type == 'v': Helpers.save_url_(url) Audio.mp4(url) elif music_type == 'P' or music_type == 'p': Helpers.save_url(url) Audio.playlist(url) else: main()
def change_volname(old_name, new_name): Helpers.run_command('diskutil', 'rename', old_name, new_name)
def run_disk_util_command(self, *args, **kwargs): out = Helpers.run_command(*args, **kwargs)
def main(save_path, params): nhidden = params['nhidden'] dropout = params['dropout'] word2vec = params['word2vec'] sub2vec = params['sub2vec'] subdict = params['subdic'] dataset = params['data'] nlayers = params['nlayers'] train_emb = params['train_emb'] sub_dim = params['sub_dim'] use_feat = params['use_feat'] gating_fn = params['gating_fn'] # save settings shutil.copyfile('config.py', '%s/config.py' % save_path) use_subs = sub_dim > 0 dp = DataPreprocessor.DataPreprocessor() data = dp.preprocess(dataset, no_training_set=False, use_subs=use_subs, subdict=subdict) print "building minibatch loaders ...", datetime.now().strftime( '%Y-%m-%d %H:%M:%S') batch_loader_train = MiniBatchLoader.MiniBatchLoader(data.training, BATCH_SIZE, sample=1) batch_loader_val = MiniBatchLoader.MiniBatchLoader(data.validation, BATCH_SIZE) print "building network ...", datetime.now().strftime('%Y-%m-%d %H:%M:%S') W_init, embed_dim, = Helpers.load_word2vec_embeddings( data.dictionary[0], word2vec) S_init, sub_dim = Helpers.load_sub_embeddings(data.dictionary[1], sub2vec) m = model.Model(nlayers, data.vocab_size, data.num_chars, W_init, S_init, nhidden, embed_dim, dropout, train_emb, sub_dim, use_feat, gating_fn) print "training ...", datetime.now().strftime('%Y-%m-%d %H:%M:%S') num_iter = 0 max_acc = 0. deltas = [] logger = open(save_path + '/log', 'a', 0) if os.path.isfile('%s/best_model.p' % save_path): print 'loading previously saved model', datetime.now().strftime( '%Y-%m-%d %H:%M:%S') m.load_model('%s/best_model.p' % save_path) print "model loaded" else: print 'saving init model', datetime.now().strftime('%Y-%m-%d %H:%M:%S') m.save_model('%s/model_init.p' % save_path) print 'loading init model', datetime.now().strftime( '%Y-%m-%d %H:%M:%S') m.load_model('%s/model_init.p' % save_path) for epoch in xrange(NUM_EPOCHS): print "epochs training ...", datetime.now().strftime( '%Y-%m-%d %H:%M:%S') estart = time.time() new_max = False for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames in batch_loader_train: loss, tr_acc, probs = m.train(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl) message = "Epoch %d TRAIN loss=%.4e acc=%.4f elapsed=%.1f" % ( epoch, loss, tr_acc, time.time() - estart) print message logger.write(message + '\n') num_iter += 1 if num_iter % VALIDATION_FREQ == 0: total_loss, total_acc, n, n_cand = 0., 0., 0, 0. for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames in batch_loader_val: outs = m.validate(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl) loss, acc, probs = outs[:3] bsize = dw.shape[0] total_loss += bsize * loss total_acc += bsize * acc n += bsize print('validate on ', str(n) + 'validation data') val_acc = total_acc / n if val_acc > max_acc: max_acc = val_acc m.save_model('%s/best_model.p' % save_path) new_max = True message = "Epoch %d VAL loss=%.4e acc=%.4f max_acc=%.4f" % ( epoch, total_loss / n, val_acc, max_acc) print message logger.write(message + '\n') m.save_model('%s/model_%d.p' % (save_path, epoch)) message = "After Epoch %d: Train acc=%.4f, Val acc=%.4f" % ( epoch, tr_acc, val_acc) print message logger.write(message + '\n') # learning schedule if epoch >= 2: m.anneal() # stopping criterion if not new_max: break logger.close()
def _random_args(self, consider_mlp=True, consider_svc=True, consider_logit=True, consider_xgb=True, consider_rf=True, consider_early_stopping=True): """ Generate random parameters for testing :param consider_mlp: (bool) whether or not to include multi layer perceptron in the optimization :param consider_svc: (bool) whether or not to include SVC in the optimization :param consider_logit: (bool) whether or not to include Logistic Regression in the optimization :param consider_xgb: (bool) whether or not to include gradient boosting in the optimization :param consider_rf: (bool) whether or not to include random forests in the optimization :param consider_early_stopping: (bool) whether or not to randomize the "down" iterations required to stop a model train :return: (dict) a dictionary contating the model parameters (MLP Solver, MLP Layers, XGB Estimators, number of Logistic Regressions and SVC kernel) """ mlp = Helpers.decision(probability=self.estimator_chance) # Decide whether to include MLP svc = Helpers.decision(probability=self.estimator_chance) # Decide whether to include SVC xgb = Helpers.decision(probability=self.estimator_chance) # Decide whether to include XGB rf = Helpers.decision(probability=self.estimator_chance) # Decide whether to include RF logit = choice([0, 1, 2, 3]) # Decide whether to include one logit, 2 logit, or none active_classifiers = [] # Keep a count so we know how many active estimators we have if mlp and self.models_consider['svc']: num_mlp = randint(0, self.maxes[ 'estimators']) # How many MLP classifiers we should use in this round of testing mlp_layer_schema = [] mlp_solvers = [] for _ in range(num_mlp): # Loop through all of the MLP classifiers we chose to consider temp_schema = [] mlp_solvers.append(choice(self.classifier_atr_choices['mlp'])) # Randomly choose an MLP algorithm number_of_layers = randint(1, self.maxes['mlp_layers']) # Randomly generate the number of layers between 1 and 3 active_classifiers.append( Helpers.reversed_initial_structure['mlp']) # Append so we keep track of active estimators for layer in range(number_of_layers): # Loop through all of the layers nodes_in_layer = randint(1, self.maxes['mlp_nodes']) # Generate the random number of nodes in each layer (up to max) temp_schema.append(nodes_in_layer) # Add it to the array containing the MLP layer schema mlp_layer_schema.append(temp_schema) else: mlp_layer_schema = None mlp_solvers = None if svc and self.models_consider['svc']: num_svc = randint(0, self.maxes[ 'estimators']) # Randomly generate the number of SVC classifiers we want to use svc_kernels = [] for _ in range(num_svc): # Loop through all of the SVC estimators we chose to consider svc_kernels.append(choice(self.classifier_atr_choices['svc'])) # Randomly choose a kernel for SVC active_classifiers.append( Helpers.reversed_initial_structure['svc']) # Append so we keep track of active estimators else: svc_kernels = None if xgb and self.models_consider['xgb']: num_xgb = randint(0, self.maxes['estimators']) xgb_estimators = [] for _ in range(num_xgb): xgb_estimators.append( randint(1, self.maxes['xgb_trees'])) # Generate a random number of XGB estimators (0 < num estim < 130) active_classifiers.append( Helpers.reversed_initial_structure['xgb']) # Append so we keep track of active estimators else: xgb_estimators = None if self.models_consider['logit']: if logit == 1: number_of_logit_regressions = randint(1, self.maxes['estimators']), 0 # Generate one group of logistic # regressions but leave the other blank for _ in range(number_of_logit_regressions[0]): active_classifiers.append( Helpers.reversed_initial_structure['logit1']) # Append so we keep track of active estimators elif logit == 2: number_of_logit_regressions = randint(1, self.maxes['estimators']), randint(0, self.maxes['estimators']) # Generate two groups of a random number of logistic regressions for _ in range(number_of_logit_regressions[0]): active_classifiers.append( Helpers.reversed_initial_structure['logit1']) # Append so we keep track of active estimators for _ in range(number_of_logit_regressions[1]): active_classifiers.append( Helpers.reversed_initial_structure['logit2']) # Append so we keep track of active estimators elif logit == 3: number_of_logit_regressions = 0, randint(1, self.maxes['estimators']) # Generate one group of logistic for _ in range(number_of_logit_regressions[1]): active_classifiers.append( Helpers.reversed_initial_structure['logit2']) # Append so we keep track of active estimators # regressions but leave the other blank else: number_of_logit_regressions = 0, 0 active_classifiers.append( Helpers.reversed_initial_structure[ 'logit3']) # Add the constant logistic regressions to the active classifiers list if rf and consider_rf: number_of_rfs = randint(0, self.maxes['estimators']) # Generate the number of RFs to consider rf_estims = [] for _ in range(number_of_rfs): # For each RF classifier active_classifiers.append( Helpers.reversed_initial_structure['rf']) # Add it to the active classifiers list rf_estims.append(randint(1, self.maxes['rf_tress'])) # Add the random number of estimators else: rf_estims = None if consider_early_stopping: early_stopping_iterations = randint(1, self.maxes['early_stopping']) # Randomly select the number of layers # required to stop the model iteration else: early_stopping_iterations = 2 if self.shuffle_models: positions = sample(active_classifiers, len(active_classifiers)) # Shuffle the indexes of estimators in the config else: positions = active_classifiers final_parameters = { 'mlp_layers': mlp_layer_schema, 'mlp_solver': mlp_solvers, 'svc_kernel': svc_kernels, 'xgb_estimators': xgb_estimators, 'logistic_regressions': number_of_logit_regressions, 'rf_estimators': rf_estims, 'early_stopping_iterations': early_stopping_iterations, 'positions': positions, } print final_parameters return final_parameters # Return the final dictionary as a result
def main(load_path, params, mode='test'): regularizer = params['regularizer'] rlambda = params['lambda'] nhidden = params['nhidden'] dropout = params['dropout'] word2vec = params['word2vec'] dataset = params['dataset'] nlayers = params['nlayers'] train_emb = params['train_emb'] subsample = params['subsample'] base_model = params['model'] char_dim = params['char_dim'] use_feat = params['use_feat'] gating_fn = params['gating_fn'] # load settings shutil.copyfile('%s/config.py' % load_path, 'config.py') dp = DataPreprocessor.DataPreprocessor() data = dp.preprocess(dataset) inv_vocab = data.inv_dictionary print("building minibatch loaders ...") if mode == 'test': batch_loader_test = MiniBatchLoader.MiniBatchLoader( data.test, BATCH_SIZE, data.dictionary) else: batch_loader_test = MiniBatchLoader.MiniBatchLoader( data.validation, BATCH_SIZE, data.dictionary) print("building network ...") W_init, embed_dim = Helpers.load_word2vec_embeddings( data.dictionary[0], word2vec) m = eval(base_model).Model(nlayers, data.vocab_size, data.num_chars, W_init, regularizer, rlambda, nhidden, embed_dim, dropout, train_emb, subsample, char_dim, use_feat, data.dictionary[4]) m.load_model('%s/best_model.p' % load_path) print("testing ...") pr = np.zeros((len(batch_loader_test.questions), batch_loader_test.max_num_cand)).astype('float32') fids, attns = [], [] total_loss, total_acc, n = 0., 0., 0 for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames, match_feat, use_char, use_char_q in batch_loader_test: outs = m.validate(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl, match_feat, use_char, use_char_q) loss, acc, probs = outs[:3] attns += [[fnames[0], probs[0, :]] + [o[0, :, :] for o in outs[3:]] ] # store one attention bsize = dw.shape[0] total_loss += bsize * loss total_acc += bsize * acc pr[n:n + bsize, :] = probs fids += fnames n += bsize logger = open(load_path + '/log', 'a', 0) message = '%s Loss %.4e acc=%.4f' % (mode.upper(), total_loss / n, total_acc / n) print message logger.write(message + '\n') logger.close() np.save('%s/%s.probs' % (load_path, mode), np.asarray(pr)) pkl.dump(attns, open('%s/%s.attns' % (load_path, mode), 'w')) f = open('%s/%s.ids' % (load_path, mode), 'w') for item in fids: f.write(item + '\n') f.close()
def main(save_path, params): regularizer = params['regularizer'] rlambda = params['lambda'] nhidden = params['nhidden'] dropout = params['dropout'] word2vec = params['word2vec'] dataset = params['dataset'] nlayers = params['nlayers'] train_emb = params['train_emb'] subsample = params['subsample'] base_model = params['model'] char_dim = params['char_dim'] use_feat = params['use_feat'] train_cut = params['train_cut'] gating_fn = params['gating_fn'] # save settings shutil.copyfile('config.py','%s/config.py'%save_path) use_chars = char_dim>0 dp = DataPreprocessor.DataPreprocessor() data = dp.preprocess(dataset, use_chars=use_chars) print("building minibatch loaders ...") batch_loader_train = MiniBatchLoader.MiniBatchLoader(data.training, BATCH_SIZE, data.dictionary, sample=train_cut, max_qry_len=85) batch_loader_val = MiniBatchLoader.MiniBatchLoader(data.validation, BATCH_SIZE, data.dictionary, max_qry_len=85) batch_loader_test = MiniBatchLoader.MiniBatchLoader(data.test, BATCH_SIZE, data.dictionary) print("building network ...") W_init, embed_dim, = Helpers.load_word2vec_embeddings(data.dictionary[0], word2vec) m = eval(base_model).Model(nlayers, data.vocab_size, data.num_chars, W_init, regularizer, rlambda, nhidden, embed_dim, dropout, train_emb, subsample, char_dim, use_feat, data.dictionary[4]) print("training ...") num_iter = 0 max_acc = 0. deltas = [] test_acc = 0. logger = open(save_path+'/log','a',0) # if os.path.isfile('%s/best_model.p'%save_path): # print('loading previously saved model') # m.load_model('%s/best_model.p'%save_path) # else: # print('saving init model') # m.save_model('%s/model_init.p'%save_path) # print('loading init model') # m.load_model('%s/model_init.p'%save_path) for epoch in xrange(NUM_EPOCHS): estart = time.time() new_max = False for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames, match_feat, use_char, use_char_q in batch_loader_train: loss, tr_acc, probs = m.train(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl, match_feat, use_char, use_char_q) # message = "Epoch %d TRAIN loss=%.4e acc=%.4f elapsed=%.1f" % ( # epoch, loss, tr_acc, time.time()-estart) # print message # logger.write(message+'\n') if num_iter % VALIDATION_FREQ == 0: total_loss, total_acc, n, n_cand = 0., 0., 0, 0. for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames, match_feat, use_char, use_char_q in batch_loader_val: outs = m.validate(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl, match_feat, use_char, use_char_q) loss, acc, probs = outs[:3] bsize = dw.shape[0] total_loss += bsize*loss total_acc += bsize*acc n += bsize val_acc = total_acc/n if val_acc > max_acc: max_acc = val_acc m.save_model('%s/best_model.p'%save_path) temp_acc, temp_n = 0.0, 0 for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames, match_feat, use_char, use_char_q in batch_loader_test: outs = m.validate(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl, match_feat, use_char, use_char_q) _, acc, _ = outs[:3] bsize = dw.shape[0] temp_acc += bsize * acc temp_n += bsize test_acc = temp_acc / temp_n new_max = True message = "Epoch %d VAL loss=%.4e acc=%.4f max_acc=%.4f test=%.4f" % ( epoch, total_loss/n, val_acc, max_acc, test_acc) print message logger.write(message+'\n') num_iter += 1 m.save_model('%s/model_%d.p'%(save_path,epoch)) message = "After Epoch %d: Train acc=%.4f, Val acc=%.4f" % (epoch, tr_acc, val_acc) print message logger.write(message+'\n') # learning schedule if epoch >=2: m.anneal() # stopping criterion if not new_max: break logger.close()
import argparse import os import sys from pathlib import Path import pdfplumber from tld import is_tld from tld.utils import update_tld_names from utils import Helpers, Termcolors __author__ = "DFIRSec (@pulsecode)" __version__ = "v0.0.8" __description__ = "Extract Indicators of Compromise (IOCs) from PDF documents." helper = Helpers() tc = Termcolors() # update/sync tld names update_tld_names() # Base directory parent = Path(__file__).resolve().parent def extractor(pdf): size = os.path.getsize(pdf) large = round(size / (1024 * 1024)) if size > 10240000: sys.exit( f"{tc.RED}[ERROR]{tc.RESET} Limit file size to 10 MB or less. Your file is {large:,} MB."
def main(load_path, params, mode='test'): nhidden = params['nhidden'] dropout = params['dropout'] word2vec = params['word2vec'] dataset = params['dataset'] nlayers = params['nlayers'] train_emb = params['train_emb'] char_dim = params['char_dim'] use_feat = params['use_feat'] gating_fn = params['gating_fn'] ent_setup = params['ent_setup'] data_path = params['data_path'] # save settings shutil.copyfile('config.py', '%s/config_test.py' % load_path) use_chars = char_dim > 0 if dataset == "clicr": dp = DataPreprocessor.DataPreprocessorClicr() #dataset_path = "/mnt/b5320167-5dbd-4498-bf34-173ac5338c8d/Datasets/bmj_case_reports_data/dataset_json_concept_annotated/" #dataset_path = "data/" data = dp.preprocess(data_path, ent_setup=ent_setup, no_training_set=True) elif dataset == "clicr_novice": dp = DataPreprocessor.DataPreprocessorNovice() data = dp.preprocess(data_path, ent_setup=ent_setup, no_training_set=True) else: dp = DataPreprocessor.DataPreprocessor() data = dp.preprocess(data_path, no_training_set=True) inv_vocab = data.inv_dictionary assert os.path.exists(params["test_file"] if mode == "test" else params["validation_file"]) print("building minibatch loaders ...") if mode == 'test': batch_loader_test = MiniBatchLoader.MiniBatchLoader( data.test, BATCH_SIZE) else: batch_loader_test = MiniBatchLoader.MiniBatchLoader( data.validation, BATCH_SIZE) f_to_cand = {i[-1]: i[3] for i in batch_loader_test.questions} print("building network ...") W_init, embed_dim = Helpers.load_word2vec_embeddings( data.dictionary[0], word2vec) m = GAReader.Model(nlayers, data.vocab_size, data.num_chars, W_init, nhidden, embed_dim, dropout, train_emb, char_dim, use_feat, gating_fn, save_attn=False) print("model load path") print('%s/best_model.p' % load_path) m.load_model('%s/best_model.p' % load_path) print("testing ...") pr = np.zeros((len(batch_loader_test.questions), batch_loader_test.max_num_cand)).astype('float32') fids, attns = [], [] pred_ans = {} total_loss, total_acc, n = 0., 0., 0 for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames in batch_loader_test: outs = m.validate(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl) loss, acc, probs = outs[:3] attns += [[fnames[0], probs[0, :]] + [o[0, :, :] for o in outs[3:]] ] # store one attention for f in range(len(fnames)): pred_cand = probs[f].argmax() pred_a_ids = f_to_cand[fnames[f]][pred_cand] pred_a = " ".join([inv_vocab[i] for i in pred_a_ids]) if ent_setup == "ent-anonym" and (dataset == "clicr" or dataset == "clicr_novice"): relabeling_dicts = data.test_relabeling_dicts if mode == 'test' else data.val_relabeling_dicts pred_a = relabeling_dicts[fnames[f]][pred_a] pred_ans[fnames[f]] = pred_a bsize = dw.shape[0] total_loss += bsize * loss total_acc += bsize * acc pr[n:n + bsize, :] = probs fids += fnames n += bsize if (params["dataset"] == "clicr" or params["dataset"] == "clicr_plain" or params["dataset"] == "clicr_novice") \ and (mode == 'test' or mode == 'validation'): print("writing predictions") preds_data = utils.to_output_preds(pred_ans) preds_filepath = load_path + '/{}.preds'.format(mode) utils.write_preds(preds_data, file_name=preds_filepath) utils.external_eval(preds_filepath, preds_filepath + ".scores", params["test_file"] if mode == "test" else params["validation_file"], extended=True) logger = open(load_path + '/log.test', 'a') message = '%s Loss %.4e acc=%.4f' % (mode.upper(), total_loss / n, total_acc / n) print(message) logger.write(message + '\n') logger.close() np.save('%s/%s.probs' % (load_path, mode), np.asarray(pr)) pickle.dump(attns, open('%s/%s.attns' % (load_path, mode), 'wb')) f = open('%s/%s.ids' % (load_path, mode), 'w') for item in fids: f.write(item + '\n') f.close()
def main(save_path, params): nhidden = params['nhidden'] dropout = params['dropout'] word2vec = params['word2vec'] dataset = params['dataset'] nlayers = params['nlayers'] train_emb = params['train_emb'] char_dim = params['char_dim'] use_feat = params['use_feat'] gating_fn = params['gating_fn'] out = 'out' # save settings shutil.copyfile('config.py', '%s/config.py' % save_path) use_chars = char_dim > 0 dp = DataPreprocessor.DataPreprocessor() data = dp.preprocess(dataset, no_training_set=False, use_chars=use_chars) word_dictionary = data.dictionary[0] the_index = word_dictionary['the'] #print('the index : {}'.format(word_dictionary['the'])) idx_to_word = dict([(v, k) for (k, v) in word_dictionary.iteritems()]) words = [idx_to_word[i] for i in sorted(idx_to_word.keys())] print("building minibatch loaders ...") batch_loader_train = MiniBatchLoader.MiniBatchLoader(data.training, BATCH_SIZE, sample=1.0) batch_loader_val = MiniBatchLoader.MiniBatchLoader(data.validation, BATCH_SIZE) print("building network ...") W_init, embed_dim, = Helpers.load_word2vec_embeddings( data.dictionary[0], word2vec) #print('the embedding : {}'.format(W_init[the_index])) #print(W_init[0:5]) print("running GAReader ...") m = GAReader.Model(nlayers, data.vocab_size, data.num_chars, W_init, nhidden, embed_dim, dropout, train_emb, char_dim, use_feat, gating_fn, words).build_network() m.compile(optimizer=tf.keras.optimizers.Adam(lr=LEARNING_RATE, clipnorm=GRAD_CLIP), loss=tf.keras.losses.categorical_crossentropy, metrics=[tf.keras.metrics.categorical_accuracy]) #tf.enable_eager_execution(config=tf.ConfigProto(allow_soft_placement = True)) with tf.Graph().as_default(): with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: K.set_session(sess) #with tf.device('/gpu:0:'): tensorboard = TensorBoardCustom(log_dir="logs", words=words) modelcheckpoint = tf.keras.callbacks.ModelCheckpoint( 'output/weights.{epoch:02d}-{val_loss:.2f}.hdf5') writer = tf.summary.FileWriter("logs") def schedule(epoch, lr): if epoch >= 3: return lr * 0.5 else: return lr lrate = LearningRateScheduler(schedule, verbose=1) for epoch in xrange(NUM_EPOCHS): for (inputs, a) in batch_loader_train: [dw, qw, m_dw, m_qw, c, m_c, cl] = inputs m = GAReader.Model(nlayers, data.vocab_size, data.num_chars, W_init, nhidden, embed_dim, dropout, train_emb, char_dim, use_feat, gating_fn, words).build_network() m.compile(optimizer=tf.keras.optimizers.Adam( lr=LEARNING_RATE, clipnorm=GRAD_CLIP), loss=tf.keras.losses.categorical_crossentropy, metrics=[tf.keras.metrics.categorical_accuracy]) #print(dw.shape) #print('dw : {}'.format(dw)) #print('qw : {}'.format(qw)) #print('m_dw : {}'.format(m_dw)) #print('m_qw : {}'.format(m_qw)) #print('c : {}'.format(c)) #print([idx_to_word[i] for i in dw[0, :, 0].tolist()]) train_summary = m.train_on_batch( inputs, to_categorical(a, batch_loader_train.max_num_cand)) print(m.get_weights()[0]) print('epoch: {}, train loss: {}, train acc: {}'.format( epoch, train_summary[0], train_summary[1])) lr = tf.summary.scalar('learning_rate', LEARNING_RATE) summary = tf.summary.merge_all() s = sess.run(summary) writer.add_summary(s) writer.close()
import youtube_dl import os import getpass import pafy from termcolor import colored from utils import Helpers user = getpass.getuser() utils = Helpers() class Music: @staticmethod def create_path(path): if not os.path.exists(path): os.mkdir(path) os.chdir(path) os.chdir(path) def mp3(self, url): path = Helpers.check_platform('Music') self.create_path(path) ydl_opts = utils.ydl_options() with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) print(colored('Download Completed', 'green')) def mp4(self, url): path = Helpers.check_platform('Videos') self.create_path(path) try:
def main(load_path, params, mode='test'): nhidden = params['nhidden'] dropout = params['dropout'] word2vec = params['word2vec'] dataset = params['dataset'] nlayers = params['nlayers'] train_emb = params['train_emb'] char_dim = params['char_dim'] use_feat = params['use_feat'] gating_fn = params['gating_fn'] dp = DataPreprocessor.DataPreprocessor() data = dp.preprocess(dataset, no_training_set=True) inv_vocab = data.inv_dictionary print("building minibatch loaders ...") if mode == 'test': batch_loader_test = MiniBatchLoader.MiniBatchLoader( data.test, BATCH_SIZE) else: batch_loader_test = MiniBatchLoader.MiniBatchLoader( data.validation, BATCH_SIZE) print("building network ...") W_init, embed_dim = Helpers.load_word2vec_embeddings( data.dictionary[0], word2vec) m = Reader.Model(nlayers, data.vocab_size, data.num_chars, W_init, nhidden, embed_dim, dropout, train_emb, char_dim, use_feat, gating_fn, save_attn=True) m.load_model('%s/best_model.p' % load_path) print("testing ...") pr = np.zeros((len(batch_loader_test.questions), batch_loader_test.max_num_cand)).astype('float32') fids, attns = [], [] total_loss, total_acc, n = 0., 0., 0 result = {} for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames in batch_loader_test: outs = m.validate(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl) loss, acc, probs = outs[:3] attns += [[fnames[0], probs[0, :]] + [o[0, :, :] for o in outs[3:]] ] # store one attention bsize = dw.shape[0] total_loss += bsize * loss total_acc += bsize * acc pr[n:n + bsize, :] = probs fids += fnames n += bsize answer = probs.argmax(1) for it in range(len(fnames)): tid = fnames[it].split('/')[-1].split('.')[0].strip() result[eval(tid)] = answer[it] print tid, answer[it] print('probs----', probs) #print('a----', a) print('fnames----', fnames) print len(result) with open('raw.txt', 'w') as ff: for i in range(1, 2501): ff.write(str(result[i]) + '\n') logger = open(load_path + '/log', 'a', 0) message = '%s Loss %.4e acc=%.4f' % (mode.upper(), total_loss / n, total_acc / n) print message logger.write(message + '\n') logger.close() np.save('%s/%s.probs' % (load_path, mode), np.asarray(pr)) pkl.dump(attns, open('%s/%s.attns' % (load_path, mode), 'w')) f = open('%s/%s.ids' % (load_path, mode), 'w') for item in fids: f.write(item + '\n') f.close()
def main(load_path, params, mode='test'): nhidden = params['nhidden'] dropout = params['dropout'] word2vec = params['word2vec'] dataset = params['data'] nlayers = params['nlayers'] sub2vec = params['sub2vec'] train_emb = params['train_emb'] sub_dim = params['sub_dim'] use_feat = params['use_feat'] gating_fn = params['gating_fn'] use_subs = sub_dim > 0 dp = DataPreprocessor.DataPreprocessor() data = dp.preprocess(dataset, no_training_set=True, use_subs=use_subs) inv_vocab = data.inv_dictionary print("building minibatch loaders ...") if mode == 'test': batch_loader_test = MiniBatchLoader.MiniBatchLoader( data.test, BATCH_SIZE) else: batch_loader_test = MiniBatchLoader.MiniBatchLoader( data.validation, BATCH_SIZE) print("building network ...") W_init, embed_dim = Helpers.load_word2vec_embeddings( data.dictionary[0], word2vec) S_init, sub_dim = Helpers.load_sub_embeddings(data.dictionary[1], sub2vec) m = model.Model(nlayers, data.vocab_size, data.num_chars, W_init, S_init, nhidden, embed_dim, dropout, train_emb, sub_dim, use_feat, gating_fn, save_attn=True) m.load_model('%s/best_model.p' % load_path) print("testing ...") pr = np.zeros((len(batch_loader_test.questions), batch_loader_test.max_num_cand)).astype('float32') fids, attns = [], [] total_loss, total_acc, n = 0., 0., 0 for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames in batch_loader_test: outs = m.validate(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl) loss, acc, probs = outs[:3] attns += [[fnames[0], probs[0, :]] + [o[0, :, :] for o in outs[3:]] ] # store one attention bsize = dw.shape[0] total_loss += bsize * loss total_acc += bsize * acc fids += fnames n += bsize print("step" + str(n) + ",acc" + str(acc)) logger = open(load_path + '/log', 'a', 0) message = '%s Loss %.4e acc=%.4f' % (mode.upper(), total_loss / n, total_acc / n) print message logger.write(message + '\n') logger.close() np.save('%s/%s.probs' % (load_path, mode), np.asarray(pr)) pkl.dump(attns, open('%s/%s.attns' % (load_path, mode), 'w')) f = open('%s/%s.ids' % (load_path, mode), 'w') for item in fids: f.write(item + '\n') f.close()
def main(save_path, params, mode='train'): word2vec = params['word2vec'] dataset = params['dataset'] dp = DataPreprocessor.DataPreprocessor() data = dp.preprocess_rc(params, dataset) print("building minibatch loaders ...") batch_loader_train = MiniBatchLoader.MiniBatchLoaderMention( params, data.training, params['batch_size']) batch_loader_val = MiniBatchLoader.MiniBatchLoaderMention( params, data.validation, params['batch_size'], shuffle=False, ensure_answer=False) batch_loader_test = MiniBatchLoader.MiniBatchLoaderMention( params, data.test, params['batch_size'], shuffle=False, ensure_answer=False) print("building network ...") W_init, embed_dim, = Helpers.load_word2vec_embeddings( data.dictionary[0], word2vec) m = GA.Model(params, W_init, embed_dim) print("training ...") num_iter = 0 max_acc = 0.0 min_loss = 1e5 logger = open(save_path + '/log', 'a', 0) train_writer = tf.summary.FileWriter(os.path.join(save_path, 'train')) val_writer = tf.summary.FileWriter(os.path.join(save_path, 'val')) if params['reload_']: print('loading previously saved model') saves = pkl.load(open('%s/checkpoints.p' % save_path)) m.load_model('%s/best_model.p' % save_path, saves[-1]) # train if mode == 'train': saves = [] for epoch in xrange(params['num_epochs']): estart = time.time() stop_flag = False for example in batch_loader_train: loss, tr_acc, probs, summary = m.train(*example[:-2]) if num_iter % params['logging_frequency'] == 0: message = ( "Epoch %d TRAIN loss=%.4e acc=%.4f elapsed=%.1f" % (epoch, loss, tr_acc, time.time() - estart)) print(message) logger.write(message + '\n') train_writer.add_summary(summary, num_iter) num_iter += 1 if num_iter % params['validation_frequency'] == 0: total_loss, total_acc, n = 0., 0., 0. for example in batch_loader_val: outs = m.validate(*example[:-2]) loss, acc, probs = outs[:3] bsize = example[0].shape[0] total_loss += bsize * loss total_acc += bsize * acc n += bsize val_acc = total_acc / n print("11111111111 ", val_acc) if val_acc > max_acc: max_acc = val_acc save_id = num_iter print("111111111111111111111111111111") sv = m.save_model('%s/best_model.p' % save_path, save_id) saves.append(save_id) new_max = True val_loss = total_loss / n message = "Epoch %d VAL loss=%.4e acc=%.4f max_acc=%.4f" % ( epoch, val_loss, val_acc, max_acc) print(message) logger.write(message + '\n') _add_summary(val_writer, val_loss, "loss", num_iter) _add_summary(val_writer, val_acc, "accuracy", num_iter) # stopping if val_loss < min_loss: min_loss = val_loss if params['stopping_criterion'] and ( val_loss - min_loss) / min_loss > 0.3: stop_flag = True break if num_iter % params["anneal_frequency"] == 0: m.anneal() #m.save_model('%s/model_%d.p'%(save_path,epoch)) message = "After Epoch %d: Train acc=%.4f, Val acc=%.4f" % ( epoch, tr_acc, max_acc) print(message) logger.write(message + '\n') if stop_flag: break # record all saved models pkl.dump(saves, open('%s/checkpoints.p' % save_path, 'w')) # test mode = 'test' if mode in ['train', 'test'] else 'val' print("testing ...") try: saves = pkl.load(open('%s/checkpoints.p' % save_path)) print('%s/checkpoints.p' % save_path) except IOError: def _to_num(foo): try: num = int(foo) except ValueError: return None return num saves = [] for directory in os.listdir(save_path): if not os.path.isdir(os.path.join(save_path, directory)): continue num = _to_num(directory) if num is None: continue saves.append(num) saves = sorted(saves) print("saves111111", saves) if not saves: print("No models saved during training!") return print('loading model') m.load_model('%s/best_model.p' % save_path, saves[-1]) total_loss, total_acc, n = 0., 0., 0 answer_structure = {} idict = data.inv_dictionary for example in batch_loader_val: outs = m.validate(*example[:-2]) loss, acc, probs = outs[:3] pred_indices = np.argmax(probs, axis=1) for i in range(len(example[-1])): cname = str(example[-1][i]).strip() gt_answer = example[10][i] answer_structure[cname] = (pred_indices[i], gt_answer, probs[i, :]) bsize = example[0].shape[0] total_loss += bsize * loss total_acc += bsize * acc n += bsize test_acc = total_acc / n test_loss = total_loss / n message = "TEST loss=%.4e acc=%.4f" % (test_loss, test_acc) print(message) logger.write(message + '\n') pkl.dump(answer_structure, open(os.path.join(save_path, "test_answer_structure.p"), "w")) logger.close() # clean up print("Cleaning up saved models ...")
def main(save_path, params): nhidden = params['nhidden'] dropout = params['dropout'] word2vec = params['word2vec'] dataset = params['dataset'] nlayers = params['nlayers'] train_emb = params['train_emb'] char_dim = params['char_dim'] use_feat = params['use_feat'] gating_fn = params['gating_fn'] ent_setup = params['ent_setup'] # ent, ent-anonym, no-ent data_path = params['data_path'] # save settings shutil.copyfile('config.py', '%s/config.py' % save_path) use_chars = char_dim > 0 if dataset == "clicr": dp = DataPreprocessor.DataPreprocessorClicr() data = dp.preprocess( #"/mnt/b5320167-5dbd-4498-bf34-173ac5338c8d/Datasets/bmj_case_reports_data/dataset_json_concept_annotated/", data_path, ent_setup=ent_setup, no_training_set=False, use_chars=use_chars) elif dataset == "clicr_novice": dp = DataPreprocessor.DataPreprocessorNovice() data = dp.preprocess(data_path, ent_setup=ent_setup, no_training_set=False, use_chars=use_chars) else: dp = DataPreprocessor.DataPreprocessor() data = dp.preprocess(data_path, no_training_set=False, use_chars=use_chars) print("building minibatch loaders ...") batch_loader_train = MiniBatchLoader.MiniBatchLoader(data.training, BATCH_SIZE, sample=1.0) batch_loader_val = MiniBatchLoader.MiniBatchLoader(data.validation, BATCH_SIZE) print("building network ...") W_init, embed_dim, = Helpers.load_word2vec_embeddings( data.dictionary[0], word2vec) m = GAReader.Model(nlayers, data.vocab_size, data.num_chars, W_init, nhidden, embed_dim, dropout, train_emb, char_dim, use_feat, gating_fn) print("training ...") num_iter = 0 max_acc = 0. deltas = [] logger = open(save_path + '/log', 'a') if os.path.isfile('%s/best_model.p' % save_path): print('loading previously saved model') m.load_model('%s/best_model.p' % save_path) else: print('saving init model') m.save_model('%s/model_init.p' % save_path) print('loading init model') m.load_model('%s/model_init.p' % save_path) for epoch in range(NUM_EPOCHS): estart = time.time() new_max = False for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames in batch_loader_train: loss, tr_acc, probs = m.train(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl) message = "Epoch %d TRAIN loss=%.4e acc=%.4f elapsed=%.1f" % ( epoch, loss, tr_acc, time.time() - estart) print(message) logger.write(message + '\n') num_iter += 1 if num_iter % VALIDATION_FREQ == 0: total_loss, total_acc, n, n_cand = 0., 0., 0, 0. for dw, dt, qw, qt, a, m_dw, m_qw, tt, tm, c, m_c, cl, fnames in batch_loader_val: outs = m.validate(dw, dt, qw, qt, c, a, m_dw, m_qw, tt, tm, m_c, cl) loss, acc, probs = outs[:3] bsize = dw.shape[0] total_loss += bsize * loss total_acc += bsize * acc n += bsize val_acc = total_acc / n if val_acc > max_acc: max_acc = val_acc m.save_model('%s/best_model.p' % save_path) new_max = True message = "Epoch %d VAL loss=%.4e acc=%.4f max_acc=%.4f" % ( epoch, total_loss / n, val_acc, max_acc) print(message) logger.write(message + '\n') # m.save_model('%s/model_%d.p'%(save_path,epoch)) message = "After Epoch %d: Train acc=%.4f, Val acc=%.4f" % ( epoch, tr_acc, val_acc) print(message) logger.write(message + '\n') # learning schedule if epoch >= 2: m.anneal() # stopping criterion if not new_max: break logger.close()
def test_create_project(self): driver = self.driver WebDriverWait(driver, 10).until_not(lambda x: x.find_element_by_xpath( '//*[@style="display: block;"]').is_displayed()) # project menu: get and click self.project_menu = driver.find_element_by_id(l.projectTool) self.assertTrue(self.project_menu.is_displayed() and self.project_menu.is_enabled()) self.project_menu.click() # create project click WebDriverWait( driver, 10).until(lambda el: el.find_element_by_id(l.CREATE_PROJECT)) driver.find_element(By.ID, l.CREATE_PROJECT).click() # fill mandatory fields helper = Helpers() helper.field(self, driver.find_element(By.ID, l.project_code), time_stamp) helper.field(self, driver.find_element(By.ID, l.project_note), time_stamp) helper.field(self, driver.find_element(By.ID, l.project_address), time_stamp) helper.field(self, driver.find_element(By.ID, l.project_client), time_stamp) helper.field(self, driver.find_element(By.ID, l.project_contact), time_stamp) print(time_stamp) driver.find_element_by_id(l.project_add_button).click()