def main(): exitStatus = 0 try: get_results.get_results() except Exception as e: logging.error(e) exitStatus = 1 try: find_latest_results.find_latest_results() except Exception as e: logging.error(e) exitStatus = 1 try: update_results.update_results() except Exception as e: logging.error(e) exitStatus = 1 try: upload_results.upload_results() except Exception as e: logging.error(e) exitStatus = 1 try: purge_results.purge_results() except Exception as e: logging.error(e) exitStatus = 5 sys.exit(exitStatus)
def gen_outcomes(): """ Generate all outcomes possible """ base_teams = get_survivors() twoexp = len(base_teams) - 1 base_peeps = readm() retv = {} base_results = get_results() pinfo = scorem(base_results, base_peeps) for group in pinfo: retv[group] = {} for indx in range(0, 2**twoexp): picks = gen_future(indx, twoexp, base_teams) temp_res = copy.deepcopy(base_results) rslts = loc_update(temp_res, picks) pinfo = scorem(rslts, base_peeps) for group in pinfo: prev = 0 wlist = [] for bracket in pinfo[group]: if bracket['score'] < prev: break wlist.append(bracket['name']) prev = bracket['score'] if indx % 100 == 0: print(indx) for name in wlist: if name not in retv[group]: retv[group][name] = [] retv[group][name].append(0) retv[group][name].append(gen_histo(twoexp)) retv[group][name][0] += 1 / len(wlist) retv[group][name][1] = gupdate(retv[group][name][1], indx) return retv
def download_day_source(source_name, date, source_day_total, authToken): iter_list = construct_page_list(source_day_total) results_list = [] # if source_day_total is 0, just pass? for p in iter_list: t = get_results(source_name, date, p[0], p[1], authToken) results_list.append(t.text) output_list = [] # keep outside the iter junk_list = [] for t in results_list: soup = BeautifulSoup(t) for num, i in enumerate(soup.findAll("ns1:document")): try: t = i.text # try: d = extract_from_b64(t) output_list.append(d) except: junk_list.append(t) # error handling ¯\_(ツ)_/¯ if junk_list: print "There were problems getting text from the base 64 in download_day_source. {0}".format( len(junk_list)) output = {"stories": output_list, "junk": junk_list} return output
def get_source_day_total(source_name, date, authToken): try: t = get_results(source_name, date, 1, 10, authToken) if t.status_code == 500: #print "There was an error. Check the log file" print "Error 500 from server on getting source-day total for {0} on {1}: {2}".format( source_name, date, t.text) return 0 c = re.findall('">(\d+?)</ns3:documentsFound', t.text) if c != []: try: c_int = int(c[0]) except TypeError as e: c_int = 0 print "Error for {0}, {1}: {2}".format(source_name, date, e) return c_int else: print "In get_source_day_total, couldn't find total documents: {0}".format( t.text) return 0 except Exception as e: print "Problem getting total for {0}, {1}: {2}".format( source_name, date, e) return 0
def download_day_source(source_name, date, source_day_total, authToken): iter_list = construct_page_list(source_day_total) results_list = [] # if source_day_total is 0, just pass? for p in iter_list: t = get_results(source_name, date, p[0], p[1], authToken) results_list.append(t.text) output_list = [] # keep outside the iter junk_list = [] for t in results_list: soup = BeautifulSoup(t) for num, i in enumerate(soup.findAll("ns1:document")): try: t = i.text # try: d = extract_from_b64(t) output_list.append(d) except: junk_list.append(t) # error handling ¯\_(ツ)_/¯ if junk_list: print "There were problems getting text from the base 64 in download_day_source. {0}".format(len(junk_list)) output = {"stories" : output_list, "junk" : junk_list} return output
def main(args): file = args[0] M = read_input(file) k = int(args[1]) D = generate_solution(M,k) rgb.write_partitions(D,'random_player_solution.png') R = gr.get_results(D,M,k) rgb.write_results(R,'random_player_results.png')
def test_things(): """ Run scorem and display the results """ results = scorem(get_results(), readm()) for group in results: print(group) for bracket in results[group]: print(bracket['name'], bracket['score'], bracket['best_poss'])
def recover(self): try: files = self.download_state() if not files: pass else: for job in files: job.update() self.text_box.config(state=NORMAL) self.text_box.insert( END, "Watching for results... (" + datetime.now().strftime('%Y-%m-%d %H:%M:%S') + ')\n', "cool") self.text_box.see(END) self.text_box.config(state=DISABLED) self.text_box.update() self.state = job.isDone() if self.state: get_results(self, job) except IndexError: pass self.master.after(900000, self.recover)
def get_source_day_total(source_name, date, authToken): try: t = get_results(source_name, date, 1, 10, authToken) if t.status_code == 500: #print "There was an error. Check the log file" print "Error 500 from server on getting source-day total for {0} on {1}: {2}".format(source_name, date, t.text) return 0 c = re.findall('">(\d+?)</ns3:documentsFound', t.text) if c != []: try: c_int = int(c[0]) except TypeError as e: c_int = 0 print "Error for {0}, {1}: {2}".format(source_name, date, e) return c_int else: print "In get_source_day_total, couldn't find total documents: {0}".format(t.text) return 0 except Exception as e: print "Problem getting total for {0}, {1}: {2}".format(source_name, date, e) return 0
def dockit_vina(verbose, reset, minimization): path = os.path.dirname(os.path.realpath(__file__)) logger = AppLogger.get(__name__, os.path.join(path, 'dockit.logs'), stream_level=logging.INFO) app_dir = os.path.dirname(os.path.abspath(__file__)) root_dir, app = os.path.split(app_dir) if verbose: stdout = None logger.info('Verbose=True. Set to False to silence console messages') else: stdout = open(os.devnull, 'w') if reset: logger.info('Resetting to pre-run state') reset_(root_dir) sys.exit() logger.info('Creating ligands and targets directories') ligands_dir = os.path.join(root_dir, 'ligands', 'PDB') if not os.path.exists(ligands_dir): os.makedirs(ligands_dir) targets_dir = os.path.join(root_dir, 'targets', 'PDB') if not os.path.exists(targets_dir): os.makedirs(targets_dir) logger.info('Checking for files in targets and ligands folders') if len([ protein for protein in os.listdir(targets_dir) if protein.lower().endswith('.pdb') ]) == 0: logger.error( 'Failed to find PDB files in {0} directory'.format(targets_dir)) sys.exit() if len([ ligand for ligand in os.listdir(ligands_dir) if ligand.lower().endswith('.pdb') ]) == 0: logger.error( 'Failed to find PDB files in {0} directory'.format(ligands_dir)) sys.exit() logger.info('Checking input file formats') for protein in os.listdir(targets_dir): if protein.lower().endswith('.pdb') and not parse_pdb( os.path.join(targets_dir, protein)): logger.error('File {0} not in PDB format'.format(protein)) sys.exit() for ligand in os.listdir(ligands_dir): if ligand.lower().endswith('.pdb') and not parse_pdb( os.path.join(ligands_dir, ligand)): logger.error('File not in PDB format {0}'.format(ligand)) sys.exit() # NOTE: sometimes the prepare_ligand will fail if pwd is not ligand PDB folder os.chdir(ligands_dir) if minimization is True: logger.info('Minimizing ligands with obminimize') try: for ligand in os.listdir(ligands_dir): subprocess.Popen( 'obminimize -o pdb {0} > min_{0}'.format(ligand), shell=True, stdout=stdout).wait() os.remove(ligand) os.rename('min_' + ligand, ligand) except Exception as e: logger.error('Failed to minimize ligands') logger.debug('Creating ligand PDBQT dir') ligands_pdbqt_dir = os.path.join(root_dir, 'ligands', 'PDBQT') if not os.path.exists(ligands_pdbqt_dir): os.makedirs(ligands_pdbqt_dir) logger.info('Converting ligands to PDBQT') for ligand in os.listdir(ligands_dir): if not ligand.lower().endswith('.pdb'): continue subprocess.Popen('{0} -l {1} -o {2} -A hydrogens -U nphs -v'.format( os.path.join(app_dir, 'prepare_ligand4.py'), ligand, os.path.join( ligands_pdbqt_dir, ligand.replace('pdb', 'pdbqt').replace('.PDB', '.pdbqt'))), shell=True, stdout=stdout).wait() logger.debug('Creating target PDBQT dir') targets_pdbqt_dir = os.path.join(root_dir, 'targets', 'PDBQT') if not os.path.exists(targets_pdbqt_dir): os.makedirs(targets_pdbqt_dir) logger.info('Converting targets to PDBQT') os.chdir(targets_dir) for target in os.listdir(targets_dir): if not target.lower().endswith('.pdb'): continue subprocess.Popen( '{0} -r {1} -o {2} -A checkhydrogens -U nphs -v'.format( os.path.join(app_dir, 'prepare_receptor4.py'), target, os.path.join( targets_pdbqt_dir, target.replace('pdb', 'pdbqt').replace('.PDB', '.pdbqt'))), shell=True, stdout=stdout).wait() logger.debug('Creating results folder') results_dir = os.path.join(root_dir, 'results') if not os.path.exists(results_dir): os.makedirs(results_dir) logger.debug('Reading dockit_param.csv') param_dir = os.path.join(root_dir, 'dockit_param.csv') param_dict = parse_param_csv(param_dir) logger.debug('Checking if targets in param file exist') for index, row in param_dict.items(): if row['target'] not in [ i.rstrip('.pdbqt') for i in os.listdir(targets_pdbqt_dir) ]: raise ValueError('{} does not exist'.format(row['target'])) logger.info('Creating docking config files and initiating docking') for index, row in param_dict.items(): for ligand in os.listdir(ligands_pdbqt_dir): if not ligand.endswith('.pdbqt'): continue ligand_name = ligand.rstrip('.pdbqt') # creating a dir to store individual target results tar_result_dir = os.path.join(results_dir, row['target'], ligand_name) if not os.path.exists(tar_result_dir): os.makedirs(tar_result_dir) flex_resi = None if row['flex_resi'] not in ('0', '', 'None', ' '): logger.info('Creating flexible residue PDBQT') try: subprocess.Popen( '{0} -r {1} -s {2} -g {3} -x {4} -v'.format( os.path.join(app_dir, 'prepare_flexreceptor4.py'), os.path.join(targets_pdbqt_dir, row['target'] + '.pdbqt'), row['flex_resi'], os.path.join(targets_pdbqt_dir, row['target'] + '.pdbqt'), os.path.join( targets_pdbqt_dir, row['target'] + row['flex_resi'] + '.pdbqt')), shell=True, stdout=stdout).wait() flex_resi = os.path.join( targets_pdbqt_dir, row['target'] + row['flex_resi'] + '.pdbqt') except Exception as e: logger.warning( 'Failed to assign flexible residues. Skipping') flex_resi = None if not os.path.isfile(flex_resi): logger.warning( 'Failed to assign flexible residues. Skipping') flex_resi = None engine = os.path.split(row['engine'])[-1] config_dir = os.path.join( tar_result_dir, '{0}_config_{1}-{2}.txt'.format(engine, row['target'], ligand_name)) logger.debug('Creating config file') vina_config(config_dir, targets_pdbqt_dir, ligands_pdbqt_dir, tar_result_dir, ligand, row, engine, flex_resi=flex_resi) logger.info('Docking {0} to {1} using {2}'.format( ligand_name, row['target'], row['engine'])) vina_command = "{0} --config {1} > {2}.txt".format( row['engine'], config_dir, os.path.join( tar_result_dir, engine + "_out_" + row['target'] + '-' + ligand.rstrip('.pdbqt'))) def run_cmd(vina_command, shell, stdout): subprocess.Popen(vina_command, shell=shell, stdout=stdout).wait() # creating a thread t = threading.Thread(target=run_cmd, args=(vina_command, True, stdout)) t.daemon = True t.start() # waiting for threads to finish main_thread = threading.currentThread() for t in threading.enumerate(): if t is main_thread: continue t.join() logger.info('Parsing results and writting output as CSV') get_results()
import pandas as pd from get_results import get_results #from predict_next_turn import predict_next_turn #from calc_stats import calc_stats #Import Current Results fileloc = 'D://Campeonato Brasileiro//resultados.txt' db_CB = get_results(fileloc) print(db_CB.head) #Generate Statistics #calc_stats() #Predict Next Turn #predict_next_turn()
import os import sys sys.path.append('/u/sciteam/gupta1/improving_genes/tools') from get_results import get_results numreplicates=10 numgenes=50 dataset_dir='/u/sciteam/gupta1/scratch/astra2_dataset' models = ['model.200.10000000.0.0000001','model.200.2000000.0.0000001','model.200.500000.0.0000001'] parameters=['unimproved'] for i in range(1,numreplicates+1): for midx in range(0,len(models)): for p in parameters: result_filepath=dataset_dir+'/'+models[midx]+'/'+str(i).zfill(2)+'/result_'+p+'.txt' #if not os.path.isfile(result_filepath) : if True: print result_filepath," Not found" rgenedir=dataset_dir+'/'+models[midx]+'/'+str(i).zfill(2) reftreefilename='true_induced50.tree' ogenedir=dataset_dir+'/'+models[midx]+'/'+str(i).zfill(2) outputtreefilename='50t_50s_fasttree.tree' outputfilename='/result_'+p+'.txt' get_results(rgenedir, reftreefilename,ogenedir,outputtreefilename,numgenes,outputfilename)
from sacred.observers import FileStorageObserver from get_results import get_results from main_loop import ex ex.observers.append(FileStorageObserver.create('cifar100_attention_runs')) try: get_results() except: print("No experiments have been done yet!") ex.run() get_results() for position_embedding_version in ["rows_then_columns"]: for segment_embedding_version in ["rows_columns"]: get_results() ex.run(config_updates={"position_embedding_version": position_embedding_version, "segment_embedding_version": segment_embedding_version, } ) get_results() # for n_heads in [1, 2, 4, 7]: # for n_layers in [1, 2, 3, 4, 5]: # ex.run(config_updates={"n_layers": n_layers, # "n_heads": n_heads, # } # ) # get_results()
def dockit(verbose, reset, minimization): path = os.path.dirname(os.path.realpath(__file__)) app_dir = os.path.dirname(os.path.abspath(__file__)) root_dir, app = os.path.split(app_dir) if verbose: stdout = None logging.info('Verbose=True. Set to False to silence console messages') else: stdout = open(os.devnull, 'w') if reset: logging.info('Resetting to pre-run state') reset_(root_dir) sys.exit() logging.debug('Reading dockit_param.csv') param_dir = os.path.join(root_dir, 'dockit_param.csv') param_dict = parse_param_csv(param_dir) logging.info('Creating ligands directory') ligands_dir = os.path.join(root_dir, 'ligands', 'PDB') if not os.path.exists(ligands_dir): os.makedirs(ligands_dir) logging.info('Creating targets directory') targets_dir = os.path.join(root_dir, 'targets', 'PDB') if not os.path.exists(targets_dir): os.makedirs(targets_dir) logging.info('Checking for PDB files in targets and ligands folders') if len([ protein for protein in os.listdir(targets_dir) if protein.lower().endswith('.pdb') ]) == 0: logging.error( 'Failed to find PDB files in {0} directory'.format(targets_dir)) sys.exit() if len([ ligand for ligand in os.listdir(ligands_dir) if ligand.lower().endswith('.pdb') ]) == 0: logging.error( 'Failed to find PDB files in {0} directory'.format(ligands_dir)) sys.exit() logging.info('Checking input files are in PDB format') for protein in os.listdir(targets_dir): if protein.lower().endswith('.pdb') and not parse_pdb( os.path.join(targets_dir, protein)): logging.error('File {0} not in PDB format'.format(protein)) sys.exit() for ligand in os.listdir(ligands_dir): if ligand.lower().endswith('.pdb') and not parse_pdb( os.path.join(ligands_dir, ligand)): logging.error('File not in PDB format {0}'.format(ligand)) sys.exit() if minimization is True: logging.info('Minimizing ligands with obminimize') try: for ligand in os.listdir(ligands_dir): subprocess.Popen( 'obminimize -o pdb {0} > min_{0}'.format(ligand), shell=True, stdout=stdout).wait() os.remove(ligand) os.rename('min_' + ligand, ligand) except Exception as e: logging.error('Failed to minimize ligands') dockit_vina(param_dict, root_dir, app_dir, ligands_dir, targets_dir, stdout) logging.info('Parsing results and writting output as CSV') get_results()
def my_main(random_seed, lr, hidden_dim1, hidden_dim2, n_heads, n_layers, n_epochs, batch_size, dropout, position_embedding_version, segment_embedding_version, attention_version, attention_indv_channels_merge_mode, use_cls): # %% --------------------------------------- Set-Up ---------------------------------------------------------------- device = torch.device("cuda" if torch.cuda.is_available() else "cpu") torch.manual_seed(random_seed) np.random.seed(random_seed) random.seed(random_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False N_CLASSES = 100 ONLY_BATCH_IN_GPU = True COLOR = True DATA_DIR = os.getcwd() + "/cifar-100-python/" # %% -------------------------------------- Data Prep -------------------------------------------------------------- x_train, y_train = load_data(DATA_DIR + "train") x_train, p_train, s_train, y_train = prep_data( x_train, y_train, only_batch_in_gpu=ONLY_BATCH_IN_GPU, use_cls=use_cls) x_test, y_test = load_data(DATA_DIR + "test") x_test, p_test, s_test, y_test = prep_data( x_test, y_test, only_batch_in_gpu=ONLY_BATCH_IN_GPU, use_cls=use_cls) # %% -------------------------------------- Training Prep ---------------------------------------------------------- model = RowColumnAttention(COLOR, use_cls, x_train.shape[1], hidden_dim1, hidden_dim2, n_layers, n_heads, N_CLASSES, dropout, position_embedding_version, segment_embedding_version, attention_version, attention_indv_channels_merge_mode).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() # %% -------------------------------------- Training Loop ---------------------------------------------------------- print( "\n ------------ Doing run number {} with configuration ---------------" .format(ex.current_run._id)) print(ex.current_run.config) try: # Gets the best result so far, so that we only save the model if the result is better (test loss in this case) get_results() results_so_far = pd.read_excel(os.getcwd() + "/experiments.xlsx") acc_test_best = min(results_so_far["test acc"].values) except: acc_test_best = 0 print("No results so far, will save the best model out of this run") best_epoch, loss_best, acc_best = 0, 1000, 0 inds_list = list(range(len(x_train))) print("Starting training loop...") start = time() for epoch in range(n_epochs): random.shuffle(inds_list) loss_train, train_steps = 0, 0 model.train() total = len(x_train) // batch_size + 1 pred_labels, real_labels = [], [ ] # Need to get the real labels because we are shuffling after each epoch with tqdm(total=total, desc=f"Epoch {epoch}") as pbar: for inds in [ inds_list[batch * batch_size:(batch + 1) * batch_size] for batch in range(len(inds_list) // batch_size + 1) ]: if not inds: break optimizer.zero_grad() if ONLY_BATCH_IN_GPU: logits = model(x_train[inds].to(device), p_train[inds].to(device), s_train[inds].to(device)) loss = criterion(logits, y_train[inds].to(device)) else: logits = model(x_train[inds], p_train[inds], s_train[inds]) loss = criterion(logits, y_train[inds]) loss.backward() optimizer.step() loss_train += loss.item() train_steps += 1 pbar.update(1) pbar.set_postfix_str( f"Training Loss: {loss_train / train_steps:.5f}") pred_labels += list( np.argmax(logits.detach().cpu().numpy(), axis=1).reshape(-1)) real_labels += list(y_train[inds].cpu().numpy().reshape(-1)) acc_train = 100 * accuracy_score(np.array(real_labels), np.array(pred_labels)) with torch.no_grad(): loss_test, test_steps = 0, 0 model.eval() total = len(x_test) // batch_size + 1 pred_labels = [] with tqdm(total=total, desc=f"Epoch {epoch}") as pbar: for batch in range(len(x_test) // batch_size + 1): inds = slice(batch * batch_size, (batch + 1) * batch_size) if len(x_test[inds]) == 0: break if ONLY_BATCH_IN_GPU: logits = model(x_test[inds].to(device), p_test[inds].to(device), s_test[inds].to(device)) loss = criterion(logits, y_test[inds].to(device)) else: logits = model(x_test[inds], p_test[inds], s_test[inds]) loss = criterion(logits, y_test[inds]) loss_test += loss.item() test_steps += 1 pbar.update(1) pbar.set_postfix_str( f"Testing Loss: {loss_test / test_steps:.5f}") pred_labels += list( np.argmax(logits.cpu().numpy(), axis=1).reshape(-1)) acc_test = 100 * accuracy_score(y_test.cpu().numpy(), np.array(pred_labels)) print( "Epoch {} | Train Loss {:.5f}, Train Acc {:.2f} - Test Loss {:.5f}, Test Acc {:.2f}" .format(epoch, loss_train / train_steps, acc_train, loss_test / test_steps, acc_test)) # Only saves the model if it's better than the models from all of the other experiments if acc_test > acc_test_best: torch.save(model.state_dict(), "PixelAttentionv2_cifar100.pt") print("A new model has been saved!") acc_test_best = acc_test if acc_test > acc_best: best_epoch, loss_best, acc_best = epoch, loss_test / test_steps, acc_test # To keep track of the metrics during the training process on metrics.json ex.log_scalar("training loss", loss_train / train_steps, epoch) ex.log_scalar("training acc", acc_train, epoch) ex.log_scalar("testing loss", loss_test / test_steps, epoch) ex.log_scalar("testing acc", acc_test, epoch) # To save the best results of this run to info.json. This is used by get_results() to generate the spreadsheet ex.info["epoch"], ex.info["test loss"], ex.info[ "test acc"] = best_epoch, loss_best, acc_best ex.info["train loss"], ex.info[ "train acc"] = loss_train / train_steps, acc_train ex.info["time (min)"], ex.info["actual epochs"] = ( time() - start) / 60, epoch + 1
import get_results import sqlite3 as lite import csv import thorobred as tb import sys filename = sys.argv[1] con = lite.connect('TB.db') myfile = open(filename, 'wb') wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) #church hill downs and gulfstream park last month CD_results = get_results.get_results('CD') GP_results = get_results.get_results('GP') AP_results = get_results.get_results('AP') PIM_results = get_results.get_results('PIM') cur = con.cursor() #do the same for other results def write_train(results): for race in results: date = race[1].split('-') #writes track date race number cur.execute('SELECT ID FROM HORSE WHERE NAME="{}"'.format(race[3])) x = cur.fetchone()
def main(expdir, plot_speakers, plot_types): '''main function''' #lowess parameters smooth = lambda y, x: lowess(y, x + 1e-12 * np.random.randn(len(x)), frac=1.0 / 3, it=0, delta=1.0, return_sorted=True) typestyle = dict() typestyle['f1'] = '-' typestyle['precision'] = '--' typestyle['recal'] = ':' results = dict() results['f1'] = get_results(expdir, 'f1') if plot_types: results['precision'] = get_results(expdir, 'precision') results['recal'] = get_results(expdir, 'recal') speakers = results['f1'].keys() #do LOWESS smooting per speaker numexamples = { s: np.array(r.values())[:, 0] for s, r in results['f1'].items() } results = { t: {s: np.array(r.values())[:, 1] for s, r in results[t].items()} for t in results } #lowess fit all the results for all the speakers speakers fits = { t: {s: smooth(a, numexamples[s]) for s, a in r.items()} for t, r in results.items() } if plot_speakers: #plot all the speakers for s in speakers: plt.figure(s) for t in fits: if t != 'f1' and not plot_types: continue plt.plot(fits[t][s][:, 0], fits[t][s][:, 1], label=t) plt.scatter(numexamples[s], results['f1'][s]) plt.legend() plt.xlabel('# Examples') plt.ylabel('Accuracy') #concatenate all the results cnumex = np.concatenate(numexamples.values()) conc = {t: np.concatenate(r.values()) for t, r in results.items()} #fit all the results cfits = {t: smooth(conc[t], cnumex) for t in conc} #plot averages plt.figure('averages') for t in cfits: if t != 'f1' and not plot_types: continue plt.plot(cfits[t][:, 0], cfits[t][:, 1], color='black', linestyle=typestyle[t], linewidth=2, label=t) for s in speakers: plt.plot(fits['f1'][s][:, 0], fits['f1'][s][:, 1], label=s) plt.legend() plt.xlabel('# Examples') plt.ylabel('Accuracy') plt.show()
def run_import(user_project_list, timestamp, initial_setup): print(os.getcwd()) # print('start run import: ', get_memory_consumption()) logging.basicConfig(filename='import.log', level=logging.WARNING, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%m-%d %H:%M:%S', filemode='a') logging.warning('>>> start of script import.py') # check if the user provided an argument to set up the database if initial_setup: print('start the initial setup') logging.warning('start the initial setup') project_table_name, results_table_name, task_table_name = create_database_and_tables( ) users_table_name = 'users' else: # define some variables that are related to the configuration of the psql database project_table_name = 'projects' results_table_name = 'results' task_table_name = 'tasks' users_table_name = 'users' # get list of all project_ids if no list of projects is provided if not user_project_list: project_list = get_all_projects() project_list = check_projects(project_list) print('got all projects from firebase: ', project_list) logging.warning('got all projects from firebase: %s' % project_list) else: print('user provided project ids: ', user_project_list) logging.warning('user provided project ids: %s' % user_project_list) project_list = check_projects(user_project_list) if not project_list: print('there are no projects to process. stop here.') logging.warning('there are no projects to process. stop here.') sys.exit(0) # get project information new_projects, updated_projects, project_dict = get_projects( project_list, project_table_name) print('new projects in firebase: ', new_projects) logging.warning('new projects in firebase: %s' % new_projects) print('updated projects in firebase: ', updated_projects) logging.warning('updated projects in firebase: %s' % updated_projects) logging.warning('get_projects() was successfull') # check if the user provided a timestamp for the processing if timestamp: print('use timestamp provided by user') logging.warning('use timestamp provided by user') pass else: # print('get timestamp from database') timestamp = get_last_timestamp(results_table_name) # timestamp = 1509637220000 # timestamp = int((time.time() - 3600)*1000) # this creates a timestamp representing the last 1 hour, in milliseconds print(timestamp) # get latest results, retrieve a list object with project id's of latest results changed_projects = get_results(results_table_name, timestamp, 500000) print('projects with new results: ', changed_projects) logging.warning('get_results() was successfull') # add the projects which need a update based on results to the ones based on contr. | progres | state # basidally merge the two lists with changed projects and remove the duplicates # merge updated projects from get_projects and get_results updated_projects = updated_projects + list( set(changed_projects) - set(updated_projects)) # remove new projects from updated projects list # when importing new projects, we already get the latest completed count updated_projects = list(set(updated_projects) - set(new_projects)) print('new projects: ', new_projects) logging.warning('new projects: %s' % new_projects) print('updated projects: ', updated_projects) logging.warning('updated projects: %s' % updated_projects) # get tasks for new projects get_tasks(new_projects, task_table_name) logging.warning('get_tasks() was successfull') # update projects that need an update # get latest completed count for projects that need an update get_tasks_completed_count(updated_projects, task_table_name) logging.warning('get_tasks_completed_count() was successfull') # save project data in psql database save_projects_psql(project_table_name, project_dict) print('saved project info to psql') logging.warning('saved project info to psql') # get user information get_users(users_table_name) logging.warning('get_users() was successfull') logging.warning('<<< end of script import.py') # print('after garbage collection: ', get_memory_consumption()) # garbage collection for i in range(2): n = gc.collect() return new_projects, updated_projects
def main(result, sweepfile, expdir): '''main function''' colorlist = [ 'red', 'blue', 'cyan', 'green', 'yellow', 'magenta', 'purple', 'pink', 'gold', 'navy', 'olive', 'grey' ] linestyles = ['-'] #read the sweepfile sweep = ConfigParser() sweep.read(sweepfile) #colorlist = ['black'] #linestyles = ['-', '--', ':', '-.'] #lowess parameters smooth = lambda y, x: lowess(y, x + 1e-12 * np.random.randn(len(x)), frac=2.0 / 3, it=0, delta=1.0, return_sorted=True) plot_speakers = True #read all the results results = [ get_results(os.path.join(expdir, section), result) for section in sweep.sections() ] expnames = sweep.sections() if plot_speakers: for speaker in results[0]: plt.figure(speaker) for i, result in enumerate(results): if speaker not in result: continue sort = np.array(result[speaker]) sort = sort[np.argsort(sort[:, 0], axis=0), :] fit = smooth(sort[:, 1], sort[:, 0]) plt.plot(fit[:, 0], fit[:, 1], color=colorlist[i % len(colorlist)], linestyle=linestyles[i % len(linestyles)], label=expnames[i]) plt.legend(loc='lower right') plt.xlabel('# Examples') plt.ylabel('Accuracy') #concatenate all the results concatenated = [ np.array(list(itertools.chain.from_iterable(result.values()))) for result in results ] #sort the concatenated data sort = [ c[np.argsort(c[:, 0], axis=0), :] if c.size else None for c in concatenated ] #smooth all the results fit = [smooth(s[:, 1], s[:, 0]) if s is not None else None for s in sort] plt.figure('result') for i, f in enumerate(fit): if f is None: continue plt.plot(f[:, 0], f[:, 1], color=colorlist[i % len(colorlist)], linestyle=linestyles[i % len(linestyles)], label=expnames[i]) plt.legend(loc='lower right') plt.xlabel('# Examples') plt.ylabel('Accuracy') plt.show()
def main(expdirs, result): '''main function''' expdirs = [os.path.normpath(expdir) for expdir in expdirs] #colorlist = ['red', 'blue', 'cyan', 'green', 'yellow', 'magenta', # 'purple', 'pink', 'gold', 'navy', 'olive', 'grey'] #linestyles = ['-'] colorlist = ['black'] linestyles = ['-', '--', ':', '-.'] #colorlist = ['green', 'dimgrey', 'darkorange'] #linestyles = ['-'] plot_speakers = True remove_uncomplete = True #tick parameters tick_params = { 'size': 'x-large', #'color': 'dimgrey' } #axis properties ax_params = {'color': 'black'} #label properties label_params = {'color': 'black', 'size': 'x-large'} #legend properties legend_params = { 'loc': 'lower right', 'edgecolor': 'black', 'fontsize': 'x-large' } lcolor = 'black' #lowess parameters smooth = lambda y, x: lowess(y, x + 1e-12 * np.random.randn(len(x)), frac=2.0 / 3, it=0, delta=1.0, return_sorted=True) #read all the results results = [get_results(expdir, result) for expdir in expdirs] expnames = [os.path.basename(expdir) for expdir in expdirs] #remove experiments that are not performed in all experiments if remove_uncomplete: speakers = set(results[0].keys()) for result in results[1:]: speakers = speakers & set(result.keys()) results = [{s: result[s] for s in speakers} for result in results] for speaker in speakers: experiments = set(results[0][speaker].keys()) for result in results[1:]: experiments = experiments & set(result[speaker].keys()) if not experiments: for result in results: del result[speaker] else: for result in results: result[speaker] = { e: result[speaker][e] for e in experiments } if plot_speakers: for speaker in results[0]: plt.figure(speaker) for i, result in enumerate(results): if speaker not in result: continue sort = np.array(result[speaker].values()) sort = sort[np.argsort(sort[:, 0], axis=0), :] fit = smooth(sort[:, 1], sort[:, 0]) plot = plt.plot(fit[:, 0], fit[:, 1], color=colorlist[i % len(colorlist)], linestyle=linestyles[i % len(linestyles)], label=expnames[i]) plt.yticks(**tick_params) plt.xticks(**tick_params) plt.axis(**ax_params) l = plt.legend(**legend_params) for text in l.get_texts(): text.set_color(lcolor) plt.xlabel('# Examples', **label_params) plt.ylabel('Accuracy', **label_params) #concatenate all the results concatenated = [ np.array( list( itertools.chain.from_iterable( [r.values() for r in result.values()]))) for result in results ] #sort the concatenated data sort = [c[np.argsort(c[:, 0], axis=0), :] for c in concatenated] #smooth all the results fit = [smooth(s[:, 1], s[:, 0]) for s in sort] plt.figure('result') for i, f in enumerate(fit): plt.plot(f[:, 0], f[:, 1], color=colorlist[i % len(colorlist)], linestyle=linestyles[i % len(linestyles)], label=expnames[i]) plt.yticks(**tick_params) plt.xticks(**tick_params) plt.axis(**ax_params) l = plt.legend(**legend_params) for text in l.get_texts(): text.set_color(lcolor) plt.xlabel('# Examples', **label_params) plt.ylabel('Accuracy', **label_params) plt.show()
def main(expdirs, result): '''main function''' ## delete the line matplotlib.use('Agg') if you want to show the plots toplot = result resultdir = expdirs expdirs = [os.path.normpath(expdir) for expdir in expdirs] #colorlist = ['red', 'blue', 'cyan', 'green', 'yellow', 'magenta', # 'purple', 'pink', 'gold', 'navy', 'olive', 'grey'] #linestyles = ['-'] colorlist = ['black'] linestyles = ['-', '--', ':', '-.', (0, (5, 10))] #colorlist = ['green', 'dimgrey', 'darkorange'] #linestyles = ['-'] plot_speakers = False # True remove_uncomplete = True #False #tick parameters tick_params = { 'size': 'x-large', #'color': 'dimgrey' } #axis properties ax_params = {'color': 'black'} #label properties label_params = {'color': 'black', 'size': 'x-large'} #legend properties legend_params = { 'loc': 'lower right', 'edgecolor': 'black', 'fontsize': 'x-large' } lcolor = 'black' #lowess parameters def smooth1(y, x): return lowess(y, x + 1e-12 * np.random.randn(len(x)), frac=2.0 / 3, it=0, delta=1.0, return_sorted=True) #weighted moving average def smooth2(y, x, step_size=0.1, width=50): bin_centers = np.arange(np.min(x), np.max(x) - 0.5 * step_size, step_size) + 0.5 * step_size bin_avg = np.zeros(len(bin_centers)) #weight with a Gaussian function def gaussian(x, amp=1, mean=0, sigma=1): return amp * np.exp(-(x - mean)**2 / (2 * sigma**2)) for index in range(0, len(bin_centers)): bin_center = bin_centers[index] weights = gaussian(x, mean=bin_center, sigma=width) bin_avg[index] = np.average(y, weights=weights) yvals = np.sort(bin_avg) xvals = [bin_centers[i] for i in list(np.argsort(bin_avg))] return np.transpose(np.array([xvals, yvals])) #read all the results results = [get_results(expdir, result) for expdir in expdirs] expnames = [os.path.basename(expdir) for expdir in expdirs] labelnames = ['pccn', 'rccn', 'nmf', 'encoder-decoder'] #labelnames = ['pccn-multi', 'rccn-multi', 'pccn', 'rccn'] smooth = smooth2 pickylabel = 'f1' if toplot == 'word_f1': pickylabel = 'word_f1' elif toplot == 'speakerperformance': pickylabel = '% correctly decoded speakers' spkweights = [] wordweights = [] wordthresholds = [] #remove experiments that are not performed in all experiments if remove_uncomplete: speakers = set(results[0].keys()) for result in results[1:]: speakers = speakers & set(result.keys()) results = [{s: result[s] for s in speakers} for result in results] for speaker in speakers: experiments = set(results[0][speaker].keys()) for result in results[1:]: experiments = experiments & set(result[speaker].keys()) if not experiments: for result in results: del result[speaker] else: for result in results: result[speaker] = { e: result[speaker][e] for e in experiments } wordweights = [1, 10, 100] if plot_speakers: for speaker in results[0]: plt.figure(speaker) for i, result in enumerate(results): if speaker not in result: continue sort = np.array(result[speaker].values()) sort = sort[np.argsort(sort[:, 0], axis=0), :] fit = smooth(sort[:, 1], sort[:, 0]) plot = plt.plot(fit[:, 0], fit[:, 1], color=colorlist[i % len(colorlist)], linestyle=linestyles[i % len(linestyles)]) # label=expnames[i]) plt.yticks(**tick_params) plt.xticks(**tick_params) plt.axis(**ax_params) l = plt.legend(**legend_params) for text in l.get_texts(): text.set_color(lcolor) plt.xlabel('# Trainingsvoorbeelden', **label_params) plt.ylabel('Accuracy', **label_params) #concatenate all the results concatenated = [ np.array( list( itertools.chain.from_iterable( [r.values() for r in result.values()]))) for result in results ] #sort the concatenated data sort = [c[np.argsort(c[:, 0], axis=0), :] for c in concatenated] #smooth all the results fit = [smooth(s[:, 1], s[:, 0]) for s in sort] plt.figure('result ' + str(toplot)) for i, f in enumerate(fit): plt.plot(f[:, 0], f[:, 1], color=colorlist[i % len(colorlist)], linestyle=linestyles[i % len(linestyles)], label=labelnames[i]) plt.yticks(**tick_params) plt.xticks(**tick_params) plt.axis(**ax_params) l = plt.legend(**legend_params) for text in l.get_texts(): text.set_color(lcolor) plt.xlabel('# Examples', **label_params) plt.ylabel(str(pickylabel), **label_params) print 'Figure of result saved in: ', resultdir[0] plt.savefig(os.path.join(resultdir[0], 'result_' + str(toplot) + '.pdf'), format='pdf') plt.show()