def dump_pretrained_emb(word2index, index2word, dump_path): iprint('Dumping pretrained embeddings...') embeddings = [GloveEmbedding(), KazumaCharEmbedding()] E = [] for i in tqdm(range(len(word2index.keys()))): w = index2word[i] e = [] for emb in embeddings: e += emb.emb(w, default='zero') E.append(e) with open(dump_path, 'wt') as f: json.dump(E, f)
def dumpOutput(self, algoName, output): """ Dump output to filesystem (/results) """ fp = os.path.join("..", "results", algoName + ".txt") iprint("Dumping output of {} to {} ...".format(algoName, fp)) with open(fp, "w") as f: f.write("# {} {} {} {}\n".format(self.G.name, self.nVertices, self.nEdges, self.k)) for out in output.T: nodeID = out[0] cluster = out[1] f.write("{} {}\n".format(nodeID, cluster))
def __init__(self, args, vocab_size, embed_size, dropout, n_layers=1): super(Encoder, self).__init__() self.vocab_size = vocab_size self.embed_size = embed_size self.n_layers = n_layers self.dropout = nn.Dropout(dropout) # input of shape (batch, seq_len) # output (batch, seq_len, embedding_dim) self.embedding = nn.Embedding( num_embeddings=vocab_size, embedding_dim=embed_size, padding_idx=PAD_TOKEN ) self.embedding.weight.data.normal_(0, 0.1) # input of shape (seq_len, batch, input_size) # output of shape (seq_len, batch, num_directions * hidden_size) # h_n of shape (num_layers * num_directions, batch, hidden_size) self.gru = nn.GRU( input_size=embed_size, hidden_size=embed_size, num_layers=n_layers, bidirectional=True ) if args["load_embedding"]: with open('data/embedding{}.json'.format(vocab_size)) as f: E = json.load(f) new = self.embedding.weight.data.new self.embedding.weight.data.copy_(new(E)) self.embedding.weight.requires_grad = True if args["fix_embedding"]: self.embedding.weight.requires_grad = False iprint('Encoder embedding requires_grad {}'.format(self.embedding.weight.requires_grad))
def make_clusters(self, params): """ Perform spectral clustering. """ # Compute unormalized laplacian iprint("Computing laplacian of type {}...".format(params["L"])) L = self.compute_laplacian(params["L"]) # Compute the eigenvalues and corresponding eigenvectors iprint("Computing eigens ...") eValues, eVectors = self.compute_eigen(L, params["eigen_norm"], params["L"], params["tol"]) dprint("evalues = {}".format(eValues)) # K-mean clustering on the eigenvectors matrix iprint("Performing kmean ...") clusters = self.kmean(eVectors) dprint("Computed labels: {}".format(np.array(clusters))) # For each line, return the associated cluster nodes = np.array(self.G.nodes()) return np.stack((nodes, clusters))
def get_args(): parser = argparse.ArgumentParser(description='TRADE Multi-Domain DST') # Training Setting parser.add_argument('-ds', '--dataset', help='dataset', required=False, default="multiwoz") parser.add_argument('-t', '--task', help='Task Number', required=False, default="dst") parser.add_argument('-path', '--path', help='path of the file to load', required=False) parser.add_argument('-sample', '--sample', help='Number of Samples', required=False, default=None) parser.add_argument('-patience', '--patience', help='', required=False, default=6, type=int) parser.add_argument('-es', '--earlyStop', help='Early Stop Criteria, BLEU or ENTF1', required=False, default='BLEU') parser.add_argument('-all_vocab', '--all_vocab', help='', required=False, default=1, type=int) parser.add_argument('-imbsamp', '--imbalance_sampler', help='', required=False, default=0, type=int) parser.add_argument('-data_ratio', '--data_ratio', help='', required=False, default=100, type=int) parser.add_argument('-um', '--unk_mask', help='mask out input token to UNK', type=int, required=False, default=1) parser.add_argument('-bsz', '--batch', help='Batch_size', required=False, type=int) parser.add_argument('-ep', '--epoch', help='Number of epochs', required=False, type=int, default=200) parser.add_argument('-cu', '--cuda', help='Cude device number', required=False, type=str, default='0') parser.add_argument('-pi', '--print_iter', help='Every n iterations to print loss values', required=False, type=int, default=100) # Testing Setting parser.add_argument('-rundev', '--run_dev_testing', help='', required=False, default=0, type=int) parser.add_argument('-viz', '--vizualization', help='vizualization', type=int, required=False, default=0) parser.add_argument('-gs', '--genSample', help='Generate Sample', type=int, required=False, default=0) parser.add_argument('-evalp', '--evalp', help='evaluation period', required=False, default=1) parser.add_argument('-an', '--addName', help='An add name for the save folder', required=False, default='') parser.add_argument('-eb', '--eval_batch', help='Evaluation Batch_size', required=False, type=int, default=0) # Model architecture parser.add_argument('-gate', '--use_gate', help='', required=False, default=1, type=int) parser.add_argument('-le', '--load_embedding', help='', required=False, default=0, type=int) parser.add_argument('-femb', '--fix_embedding', help='', required=False, default=0, type=int) # Model Hyper-Parameters parser.add_argument('-dec', '--decoder', help='decoder model', required=False) parser.add_argument('-hdd', '--hidden_size', help='Hidden size', required=False, type=int, default=400) parser.add_argument('-lr', '--learning_rate', help='Learning Rate', required=False, type=float) parser.add_argument('-dr', '--dropout', help='Drop Out', required=False, type=float) parser.add_argument('-lm', '--limit', help='Word Limit', required=False, default=-10000) parser.add_argument('-clip', '--clip', help='gradient clipping', required=False, default=10, type=int) parser.add_argument('-tfr', '--teacher_forcing_ratio', help='teacher_forcing_ratio', type=float, required=False, default=0.5) # parser.add_argument('-l', '--layer', help='Layer Number', required=False) # Unseen Domain Setting parser.add_argument('-l_ewc', '--lambda_ewc', help='regularization term for EWC loss', type=float, required=False, default=0.01) parser.add_argument('-fisher_sample', '--fisher_sample', help='number of sample used to approximate fisher mat', type=int, required=False, default=0) parser.add_argument("--all_model", action="store_true") parser.add_argument("--domain_as_task", action="store_true") parser.add_argument('--run_except_4d', help='', required=False, default=1, type=int) parser.add_argument("--strict_domain", action="store_true") parser.add_argument('-exceptd', '--except_domain', help='', required=False, default="", type=str) parser.add_argument('-onlyd', '--only_domain', help='', required=False, default="", type=str) args = vars(parser.parse_args()) if args['load_embedding']: args['hidden'] = 400 iprint( 'Using hidden size = 400 for pretrained word embedding (300 + 100)...', msg_type=1) if args['fix_embedding']: args['addName'] += 'FixEmb' if args['except_domain'] != '': args['addName'] += 'Except' + args['except_domain'] if args['only_domain'] != '': args['addName'] += 'Only' + args['only_domain'] # print args for double checking args_formatted = pprint.pformat(args, indent=4) iprint('Training arguments: \n' + args_formatted) return args
def cli()->None: ut.banner() prs = arp.ArgumentParser() sub_prs = prs.add_subparsers(dest ="command") aa = prs.add_argument run_prs = sub_prs.add_parser("run") ana_prs = sub_prs.add_parser("analyze") run_aa = run_prs.add_argument ana_aa = ana_prs.add_argument run_aa("-z","--input_data",required = True) run_aa("-mp","--model_parameters",required = True) run_aa("-t0","--initial_time", type = int, default = 0, ) run_aa("-o", "--out_dir", default = "/tmp", ) run_aa("-tag","--tag", default = "", ) ana_aa("-r","--result", type = str, ) ana_aa("-a","--animate", action = "store_true", default = False, ) ana_aa("-o","--out_dir", default = "/tmp", ) ana_aa("-img","--images", nargs = "+", ) ana_aa("-tag", "--tag", default ="", ) ana_aa("-ms","--marker_size", default = 5.0, type = float ) ana_aa("-d","--delay", default = 10, type = int, ) ana_aa("-kf","-keep_frames", default = False, type = bool, action ="store_true", ) args = prs.parse_args() if args.command == "run": iprint("Reading model parameters from : {}".format(args.model_parameters)) with open(args.model_parameters) as f: model_parameters = yaml.load(f, Loader=yaml.FullLoader) for p,v in model_parameters.items(): if isinstance(v,str): model_parameters[p] = eval(v) if isinstance(v,dict): for sp,sv in v.items(): if isinstance(sv,str): v[sp] = eval(sv) iprint("Reading observational data from : {}".format(args.input_data)) obs_data = pd.read_csv(args.input_data, sep = "\t", header = 0, index_col = 0, ) iprint("Running celltracker") results = run_celltrack(obs = obs_data, model_params = model_parameters, t0 = args.initial_time, ) iprint("Completed celltracker") tag = (args.tag + "-" if args.tag != "" else args.tag) out_fn = osp.join(args.out_dir, tag + "cell-track-res.tsv", ) results.to_csv(out_fn, sep = "\t", ) iprint("Saved results to : {}".format(out_fn)) if args.command == "analyze": iprint("Entering analysis module") iprint("Using results file : {}".format(args.result)) results = pd.read_csv(args.result, sep = "\t", header = 0, index_col = 0, ) if args.tag == "": tag = osp.basename(args.results).split("-")[0] + "-" if tag == "cell": tag = "" else: tag = args.tag + "-" if args.animate: iprint("Animating results") from sys import platform if platform.lower() != "linux": eprint("OS not supported for animation") else: if osp.isdir(args.images[0]): image_ext = ["png", "tiff", "tif", "jpg", "jpeg", "gif", "bmp"] is_image = lambda x : x.split(".")[-1] in image_ext img_pths = list(filter(is_image,os.listdir(args.images[0]))) img_pths = [osp.join(args.images[0],p) for p in img_pths] else: img_pths = args.images img_pths.sort() iprint("Initating animation") ut.animate_trajectories(results, images = img_pths, out_dir = args.out_dir, tag = tag, marker_size = args.marker_size, delay = args.delay, save_frames = args.keep_frames, ) iprint("Completed animation. Results saved to : {}".format(args.out_dir))
def get_all_data(args, training=True, batch_size=100) -> Tuple[DataLoader, DataLoader, DataLoader]: # evaluation batch size eval_batch = args["eval_batch"] if args["eval_batch"] else batch_size # pickle file path if args['path']: saving_folder_path = args['path'] else: saving_folder_path = 'save/{}-{}-{}-{}/'.format(args["decoder"], args["addName"], args['dataset'], args['task']) iprint('Path to save data: ' + saving_folder_path) if not os.path.exists(saving_folder_path): os.makedirs(saving_folder_path) # read domain-slot pairs ontology = json.load(open(FILE_ONTOLOGY, 'r')) all_slots = get_slot_info(ontology) # vocab vocab_name = 'vocab-all.pkl' if args["all_vocab"] else 'vocab-train.pkl' mem_vocab_name = 'mem-vocab-all.pkl' if args["all_vocab"] else 'mem-vocab-train.pkl' # if vocab files exist, read them in, otherwise we create new ones if os.path.exists(saving_folder_path + vocab_name) and os.path.exists(saving_folder_path + mem_vocab_name): iprint('Loading saved vocab files...') with open(saving_folder_path + vocab_name, 'rb') as handle: vocab = pickle.load(handle) with open(saving_folder_path + mem_vocab_name, 'rb') as handle: mem_vocab = pickle.load(handle) else: vocab = Vocab() vocab.index_words(all_slots, 'slot') mem_vocab = Vocab() mem_vocab.index_words(all_slots, 'slot') if training: pair_train, train_max_len, slot_train, train_dataloader = get_data( args=args, file=FILE_TRAIN, slots=all_slots, dataset='train', vocab=vocab, mem_vocab=mem_vocab, training=training, batch_size=batch_size, shuffle=True ) nb_train_vocab = vocab.n_words else: pair_train, train_max_len, slot_train, train_dataloader, nb_train_vocab = [], 0, {}, [], 0 pair_dev, dev_max_len, slot_dev, dev_dataloader = get_data( args=args, file=FILE_DEV, slots=all_slots, dataset='dev', vocab=vocab, mem_vocab=mem_vocab, training=training, batch_size=eval_batch, shuffle=False ) pair_test, test_max_len, slot_test, test_dataloader = get_data( args=args, file=FILE_TEST, slots=all_slots, dataset='test', vocab=vocab, mem_vocab=mem_vocab, training=training, batch_size=eval_batch, shuffle=False ) iprint('Dumping vocab files...') with open(saving_folder_path + vocab_name, 'wb') as handle: pickle.dump(vocab, handle) with open(saving_folder_path + mem_vocab_name, 'wb') as handle: pickle.dump(mem_vocab, handle) embedding_dump_path = 'data/embedding{}.json'.format(len(vocab.index2word)) if not os.path.exists(embedding_dump_path) and args["load_embedding"]: dump_pretrained_emb(vocab.word2index, vocab.index2word, embedding_dump_path) test_4d = [] if args['except_domain'] != '': pair_test_4d, _, _, test_4d = get_data( file=FILE_TEST, slots=all_slots, dataset='dev', vocab=vocab, mem_vocab=mem_vocab, training=training, batch_size=eval_batch, shuffle=False ) max_word = max(train_max_len, dev_max_len, test_max_len) + 1 iprint('Read %s pairs train' % len(pair_train)) iprint('Read %s pairs dev' % len(pair_dev)) iprint('Read %s pairs test' % len(pair_test)) iprint('Vocab_size: %s' % vocab.n_words) iprint('Vocab_size Training %s' % nb_train_vocab) iprint('Vocab_size Belief %s' % mem_vocab.n_words) iprint('Max. length of dialog words for RNN: %s' % max_word) # iprint('USE_CUDA={}'.format(USE_CUDA)) # slots_list = [all_slots, slot_train, slot_dev, slot_test] slots_dict = { 'all': all_slots, 'train': slot_train, 'val': slot_dev, 'test': slot_test } iprint('[Train Set & Dev Set Slots]: Number is {} in total'.format(len(slots_dict['val']))) iprint(slots_dict['val']) iprint('[Test Set Slots]: Number is {} in total'.format(len(slots_dict['test']))) iprint(slots_dict['test']) vocabs = [vocab, mem_vocab] return train_dataloader, dev_dataloader, test_dataloader, test_4d, vocabs, slots_dict, nb_train_vocab
def read_langs(args, file, slots, dataset, vocab, mem_vocab, training, max_line=None, update_vocab=False) -> (List[Dict], int, List[str]): iprint('Reading from {}'.format(file)) data = [] max_resp_len = 0 max_value_len = 0 domain_counter = {} with open(file) as f: dialogues = json.load(f) # integrate user utterance and system response into vocab for dialogue in dialogues: if (args['all_vocab'] or dataset == 'train') and training: for turn in dialogue['dialogue']: vocab.index_words(turn['system_transcript'], 'utter') vocab.index_words(turn['transcript'], 'utter') # determine training data ratio, default is 100% if training and dataset == 'train' and args['data_ratio'] != 100: random.Random(10).shuffle(dialogues) dialogues = dialogues[:int(len(dialogues) * 0.01 * args['data_ratio'])] cnt_lin = 1 for dialogue in dialogues: dialogue_history = '' # Filtering and counting domains for domain in dialogue['domains']: if domain not in EXPERIMENT_DOMAINS: continue if domain not in domain_counter.keys(): domain_counter[domain] = 0 domain_counter[domain] += 1 # Unseen domain setting if args['only_domain'] != '' and args['only_domain'] not in dialogue['domains']: continue if ( args['except_domain'] != '' and dataset == 'test' and args['except_domain'] not in dialogue['domains'] ) or ( args['except_domain'] != '' and dataset != 'test' and [args['except_domain']] == dialogue['domains'] ): continue # Reading data for turn in dialogue['dialogue']: turn_domain = turn['domain'] turn_id = turn['turn_idx'] turn_uttr = turn['system_transcript'] + ' ; ' + turn['transcript'] turn_uttr_strip = turn_uttr.strip() dialogue_history += turn['system_transcript'] + ' ; ' + turn['transcript'] + ' ; ' source_text = dialogue_history.strip() turn_belief_dict = fix_label_error(turn['belief_state'], False, slots) # Generate domain-dependent slot list slot_temp = slots if dataset == 'train' or dataset == 'dev': if args['except_domain'] != '': slot_temp = [k for k in slots if args['except_domain'] not in k] turn_belief_dict = OrderedDict([(k, v) for k, v in turn_belief_dict.items() if args['except_domain'] not in k]) elif args['only_domain'] != '': slot_temp = [k for k in slots if args['only_domain'] in k] turn_belief_dict = OrderedDict([(k, v) for k, v in turn_belief_dict.items() if args['only_domain'] in k]) else: if args['except_domain'] != '': slot_temp = [k for k in slots if args['except_domain'] in k] turn_belief_dict = OrderedDict([(k, v) for k, v in turn_belief_dict.items() if args['except_domain'] in k]) elif args['only_domain'] != '': slot_temp = [k for k in slots if args['only_domain'] in k] turn_belief_dict = OrderedDict([(k, v) for k, v in turn_belief_dict.items() if args['only_domain'] in k]) turn_belief_list = [str(k) + '-' + str(v) for k, v in turn_belief_dict.items()] if (args['all_vocab'] or dataset == 'train') and training: mem_vocab.index_words(turn_belief_dict, 'belief') ''' generate_y 是每个 slot 的 value 的 array (dontcare/none/真实值) gating_label 是每个 slot 的 value 的类型 (0/1/2), dontcare/none/ptr ''' generate_y, gating_label = [], [] # class_label, generate_y, slot_mask, gating_label = [], [], [], [] # start_ptr_label, end_ptr_label = [], [] for slot in slot_temp: if slot in turn_belief_dict.keys(): generate_y.append(turn_belief_dict[slot]) if turn_belief_dict[slot] == 'dontcare': gating_label.append(SLOT_GATE_DICT['dontcare']) elif turn_belief_dict[slot] == 'none': gating_label.append(SLOT_GATE_DICT['none']) else: gating_label.append(SLOT_GATE_DICT['ptr']) if max_value_len < len(turn_belief_dict[slot]): max_value_len = len(turn_belief_dict[slot]) else: generate_y.append('none') gating_label.append(SLOT_GATE_DICT['none']) data_detail = { 'ID': dialogue['dialogue_idx'], 'domains': dialogue['domains'], 'turn_domain': turn_domain, 'turn_id': turn_id, 'dialog_history': source_text, 'turn_belief': turn_belief_list, 'gating_label': gating_label, 'turn_uttr': turn_uttr_strip, 'generate_y': generate_y } data.append(data_detail) if max_resp_len < len(source_text.split()): max_resp_len = len(source_text.split()) cnt_lin += 1 if max_line and cnt_lin >= max_line: break # add t{} to the lang file if "t{}".format(max_value_len - 1) not in mem_vocab.word2index.keys() and training: for time_i in range(max_value_len): mem_vocab.index_words("t{}".format(time_i), 'utter') iprint('domain_counter' + str(domain_counter)) ''' data -> an array of 每个 turn 的 data_detail 结构的数据 max_resp_len -> 每个 turn,最长的 utterance + system_response 的长度 (word count) slot_temp -> 过滤后的 SLOTS, an array of strings of slot names ''' return data, max_resp_len, slot_temp
def train_model(model, device, dataloaders, slots_dict, criterion_ptr, criterion_gate, optimizer, scheduler, clip, num_epochs, print_iter, patience): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_joint_acc = 0.0 patience_counter = 0 # Statistics results = { 'train': { 'loss_ptr': [], 'loss_gate': [], 'joint_acc': [], 'turn_acc': [], 'f1': [] }, 'val': { 'loss_ptr': [], 'loss_gate': [], 'joint_acc': [], 'turn_acc': [], 'f1': [] } } for n_epoch in range(args['epoch']): print('Epoch {}'.format(n_epoch)) print('=' * 20) for phase in ['train', 'val']: print('Phase [{}]'.format(phase)) print('-' * 10) if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode predictions = {} dataloader = dataloaders[phase] for iteration, data in enumerate(dataloader): data['context'] = data['context'].to(device=device) data['generate_y'] = data['generate_y'].to(device=device) data['y_lengths'] = data['y_lengths'].to(device=device) data['turn_domain'] = data['turn_domain'].to(device=device) data['gating_label'] = data['gating_label'].to(device=device) # zero the parameter gradients optimizer.zero_grad() with torch.set_grad_enabled(phase == 'train'): all_point_outputs, all_gate_outputs, words_point_out = model( data=data, slots_type=phase) # logits = all_point_outputs.transpose(0, 1).transpose(1, 3).transpose(2, 3).contiguous() # targets = data["generate_y"].contiguous() # loss_ptr = criterion_ptr(logits, targets) loss_ptr = masked_cross_entropy_for_value( all_point_outputs.transpose(0, 1).contiguous(), data["generate_y"].contiguous( ), # [:,:len(self.point_slots)].contiguous(), data["y_lengths"]) logits_gate = all_gate_outputs.transpose(1, 2).contiguous() targets_gate = data["gating_label"].t().contiguous() loss_gate = criterion_gate(logits_gate, targets_gate) loss = loss_ptr + loss_gate if phase == 'train' and iteration % print_iter == 0: print( 'Iteration {}: loss_ptr = {:4f}, loss_gate = {:4f}' .format(iteration, loss_ptr, loss_gate)) if phase == 'train': loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), clip) optimizer.step() accumulate_result(predictions, data, slots_dict[phase], all_gate_outputs, words_point_out) # Calculate evaluation metrics and save statistics to file joint_acc_score_ptr, F1_score_ptr, turn_acc_score_ptr = evaluate_metrics( predictions, "pred_bs_ptr", slots_dict[phase]) results[phase]['loss_ptr'].append(loss_ptr.item()) results[phase]['loss_gate'].append(loss_gate.item()) results[phase]['joint_acc'].append(joint_acc_score_ptr) results[phase]['turn_acc'].append(turn_acc_score_ptr) results[phase]['f1'].append(F1_score_ptr) print("Joint Acc: {:.4f}".format(joint_acc_score_ptr)) print("Turn Acc: {:.4f}".format(turn_acc_score_ptr)) print("Joint F1: {:.4f}".format(F1_score_ptr)) pickle.dump(results, open(os.path.join(saving_dir, 'results.p'), 'wb')) # deep copy the model if phase == 'val': scheduler.step(joint_acc_score_ptr) if joint_acc_score_ptr > best_joint_acc: patience_counter = 0 best_joint_acc = joint_acc_score_ptr best_model_wts = copy.deepcopy(model.state_dict()) model_save_path = os.path.join( saving_dir, 'model-joint_acc-{:.4f}.pt'.format(best_joint_acc)) torch.save(model, model_save_path) if patience_counter == patience: iprint('Early stop at epoch {}'.format(n_epoch)) break print() time_elapsed = time.time() - since iprint('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) iprint('Best validation joint_accurate: {:4f}'.format(best_joint_acc)) # load best model weights model.load_state_dict(best_model_wts) return model
F1 = 2 * precision * recall / float(precision + recall) if ( precision + recall) != 0 else 0 else: if len(pred) == 0: precision, recall, F1, count = 1, 1, 1, 1 else: precision, recall, F1, count = 0, 0, 0, 1 return F1, recall, precision, count if __name__ == '__main__': args = get_args() # Only set the GPU to be used visible, and so just specify cuda:0 as the device os.environ["CUDA_VISIBLE_DEVICES"] = args['cuda'] device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') iprint('Using device = {}'.format(device)) (train_dataloader, dev_dataloader, test_dataloader, test_special, vocabs, slots_dict, max_word) = get_all_data(args=args, training=True, batch_size=args['batch']) model = Trade(args=args, device=device, slots_dict=slots_dict, vocabs=vocabs) model = model.to(device=device) criterion_ptr = nn.CrossEntropyLoss(ignore_index=PAD_TOKEN) criterion_gate = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['learning_rate'])
def foo(): iprint("inside foo") bar()
def main(): iprint("hello") def foo(): iprint("inside foo") bar() def bar(): iprint("inside bar") foo() bar() a = [1, 2, 3, 4, 5, 6] iprint(a) b = [i for i in range(1000)] iprint(b) watch = StopWatch() log1 = NameLog() log2 = NameLog() log1.track("test1", "test2") log2.track("accuracy") test1 = 4 def baz(log1, log2): test2 = 42 log1.record() for accuracy in range(10): log2.record() baz(log1, log2) print(log1.tracked) print(log2.tracked) @CodeMemo def longjob(t, ret): print("Sleeping...") sleep(t) print("Done.") return ret def worker(): return longjob(5, ret=4) watch.start() res = worker() watch.stop() iprint(f"Got {res}") iprint(f"Elapsed {watch.elapsed()}") exit() tagger = Tagger() print("Basic") print(f"# possible tags: {tagger.size():,}") for i in range(5): print(tagger.make()) print() tagger = Tagger() seen = set() tag = tagger.make() while tag not in seen: seen.add(tag) tag = tagger.make() assert len(seen) == tagger.size() print("Space size matches.") print() tagger = Tagger(10) print("Numeric 10") print(f"# possible tags: {tagger.size():,}") for i in range(5): print(tagger.make()) print() tagger = Tagger("aaa") print("Letters 3") print(f"# possible tags: {tagger.size():,}") for i in range(5): print(tagger.make()) print()
def bar(): iprint("inside bar")
def gridSearch(self, gridParams, dumpOutputBest=True, plots=("score")): """ Perform parameters optimization to find best algorithm for a given graph partitioning problem instance. :param dumpOutputBest: if we should write in a .txt the result of the best parameter set or not. :param plots: a list of metrics to plot to compare the algorithms :return: (best parameters, bestMetrics, bestOutput), save output on fs if option is set to True """ allMetrics = [] bestOutput = None bestMetrics = {"n_ratio_cut":float("inf"), "score": float("inf")} bestParams = None allClusterSizes = [] iprint("\nPerforming grid search on {} " "...\n=======================================".format(self.graphName)) for i, params in enumerate(gridParams): iprint("\nAlgorithm {} with params = {}:\n-------------------------------------------------------\n".format(i+1, params)) output = self.solver.make_clusters(params) metrics, nVerticesClusters = self.evaluate(output) if metrics["score"] < bestMetrics["score"]: bestOutput = output bestMetrics = metrics bestParams = params print(metrics) allMetrics.append(metrics) allClusterSizes.append(nVerticesClusters) print("\nEnd of gridsearch: best parameters were {} with " "metrics = {}".format(bestParams, bestMetrics)) if dumpOutputBest is True: self.solver.dumpOutput(self.graphName, bestOutput) if plots is None or len(plots) == 0: return bestParams, bestMetrics, bestOutput else: if "score" in plots: y = [m["score"] for m in allMetrics] self.barPlot(y, gridParams, "Score") if "n_ratio_cut" in plots: y = [m["n_ratio_cut"] for m in allMetrics] self.barPlot(y, gridParams, "Normalized-Ratio-Cut") if "expansion" in plots: y = [m["expansion"] for m in allMetrics] self.barPlot(y, gridParams, "Expansion") if "bindex" in plots: y = [m["bindex"] for m in allMetrics] self.barPlot(y, gridParams, "Balance index") if "max_C_size" in plots: y = [m["max_C_size"] for m in allMetrics] self.barPlot(y, gridParams, "Maximum cluster size") if "min_C_size" in plots: y = [m["min_C_size"] for m in allMetrics] self.barPlot(y, gridParams, "Minimum cluster size") if "var_C_size" in plots: y = [m["var_C_size"] for m in allMetrics] self.barPlot(y, gridParams, "Variance of cluster size") if "box_plot" in plots: self.boxPlot(allClusterSizes) return bestParams, bestMetrics, bestOutput