def run(): answer = None start_time = util.now() with open(os.path.join(INPUT_DIR, 'input02.dat'), 'r') as f: all_lines = [l.strip() for l in f.readlines()] for position in reversed(range(len(all_lines[0]))): have_seen = {} for word in all_lines: letters = list(word) del letters[position] minus_one = ''.join(letters) if minus_one in have_seen: answer = minus_one, word, have_seen[minus_one], position + 1 break have_seen[minus_one] = word if answer: break print('answer is: %s (from %s and %s by masking position %d)' % answer) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def wrapper(*args, **kwargs): t = now() result = func(*args, **kwargs) ms = take_ms(t) if ms > 500: logger.warn('Execute [{}] takes {}ms'.format(func.__name__, ms)) return result
def print_and_save_metric(model, sampled_data, criterion, epoch, max_num_epochs, data_type, batch_size, use_gpu, abbvid_2_typeid): all_true_y, all_pred_y, all_pred_p, loss, type_loss, type_true_y, type_pred_y = eval( model, sampled_data, criterion, batch_size, use_gpu, abbvid_2_typeid) metric_dict = classification_report(all_true_y, all_pred_y, output_dict=True) ap = average_precision_score(all_true_y, all_pred_p) print(('{} Epoch {}/{}: {} loss: {};'.format(now(), epoch + 1, max_num_epochs, data_type, loss))) print(classification_report(all_true_y, all_pred_y, output_dict=False)) print('{}, Average Precision:{}'.format(data_type, ap)) if abbvid_2_typeid is not None: print('typing loss', type_loss) print( classification_report(type_true_y, type_pred_y, output_dict=False)) res = { 'metric_dict': metric_dict, 'ap': ap, 'loss': loss, 'all_true_y': all_true_y, 'all_pred_y': all_pred_y, 'all_pred_p': all_pred_p } return res
def wrapper(*args, **kwarg): cache_dir = pathlib.Path(CACHE_DIR) url = kwarg.get('url').value date = datetime.datetime.now().date().isoformat() if url: items = url.split('/') domain = items[2] cache_dir = cache_dir / date / domain if not cache_dir.exists(): cache_dir.mkdir(parents=True, exist_ok=True) t = now() h = hashlib.md5() h.update(url.encode()) cache_file = cache_dir / h.hexdigest() if not cache_file.exists(): ok, result = func(*args, **kwarg) if ok: with open(cache_file, 'w', encoding='utf8') as fp: fp.write(result) logger.info( 'Save to cache, takes {}ms, url: {}, file: {}, size: {}' .format(take_ms(t), url, cache_file, len(result))) else: logger.info('Read from cache: {}ms, {},{}'.format( take_ms(t), str(cache_file), url)) with open(cache_file, encoding='utf8') as fp: result = '\n'.join(fp.readlines()) return result
def run(): grid, answer = [], 0 start_time = util.now() with open(os.path.join(INPUT_DIR, 'input03.dat'), 'r') as f: all_squares = [_define_square(l) for l in f.readlines() if l.strip()] max_x = max([s.x + s.width for s in all_squares]) max_y = max([s.y + s.height for s in all_squares]) for i in range(max_y + 1): grid.append([[] for j in range(max_x + 1)]) for square in all_squares: for i in range(square.y, square.y + square.height): for j in range(square.x, square.x + square.width): grid[i][j].append(square.id) for row in grid: for column in row: if len(column) > 1: answer += 1 print('answer is: %s' % str(answer)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def run(): starting_frequency, start_time, answer = 0, util.now(), 0 with open(os.path.join(INPUT_DIR, 'input01.dat'), 'r') as f: answer = starting_frequency + sum([int(v) for v in f.readlines()]) print('answer is: %s' % str(answer)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def run(): start_time, answer = util.now(), 0 puzzle_input = 289326 grid = Grid() x, y = grid.find_position(puzzle_input) answer = abs(x) + abs(y) print('answer is: %s' % str(answer)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def run(): start_time, answer = util.now(), 0 input_file = join(dirname(abspath(__file__)), 'inputs', 'input02.dat') with open(input_file, 'r') as f: for row in f: for x, y in evenly_divisible([int(d) for d in row.split()]): answer += int(x / y) print('answer is: %s' % str(answer)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def run(): start_time = util.now() with open(INPUT, 'r') as f: digits = f.read().strip() wrapped = digits + digits[0] answer = sum([int(d) for i, d in enumerate(digits) if d == wrapped[i+1]]) print('answer is: %s' % str(answer)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def get_messages(groups, mask, isimg, gmtype, label, expired, pos, nums, ismanager, ap_groups=''): ''' get messages filter : groups, mask position: start , per ''' if expired: # days = 0 - expired # expired = util.now('%Y-%m-%d', days=days) expired = util.now('%Y-%m-%d') return mysql.get_messages(groups, mask, isimg, gmtype, label, expired, pos, nums, ismanager, ap_groups)
def run(): start_time, answer = util.now(), 0 input_file = join(dirname(abspath(__file__)), 'inputs', 'input02.dat') def checksum(row_data): return max(row_data) - min(row_data) if any(row_data) else 0 with open(input_file, 'r') as f: for row in f: answer += checksum([int(d) for d in row.split()]) print('answer is: %s' % str(answer)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def run(): start_time, answer = util.now(), 0 with open(os.path.join(INPUT_DIR, 'input01.dat'), 'r') as f: for row in f.readlines(): fuel = fuel_needed(int(row)) while fuel > 0: answer += fuel fuel = fuel_needed(fuel) print('answer is: %s' % str(answer)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def run(): start_time, answer = util.now(), None with open(os.path.join(INPUT_DIR, 'input01.txt'), 'r') as f: values = sorted([int(v) for v in f.readlines()]) for x in values: for y in reversed([n for n in values if n >= x]): if x + y == 2020: answer = (x, y, x * y) if answer: break print('answer is: %s' % str(answer)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def run(): start_time, answer = util.now(), None with open(os.path.join(INPUT_DIR, 'input02.txt'), 'r') as f: lines = f.readlines() def _is_valid(entry): parts = [p for p in SPLITS_PATTERN.split(entry.strip())] index_a, index_b, required, given = int(parts[0]) - 1, int( parts[1]) - 1, parts[2], parts[3] matches = [i for i in (index_a, index_b) if given[i] == required] return bool(len(matches) == 1) valid_entries = [e for e in lines if _is_valid(e)] answer = len(valid_entries) print('answer is: %s' % str(answer)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def run(): current_frequency, answer, tried, already_seen = 0, 0, 0, set() start_time = util.now() with open(os.path.join(INPUT_DIR, 'input01.dat'), 'r') as f: all_values = [int(v) for v in f.readlines()] for value in repeating_iterator(all_values): current_frequency += int(value) tried += 1 if current_frequency in already_seen: answer = current_frequency break already_seen.add(current_frequency) print('answer is: %s (found after %d changes)' % (str(answer), tried)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def run(): start_time, answer = util.now(), None with open(os.path.join(INPUT_DIR, 'input01.txt'), 'r') as f: values = sorted([int(v) for v in f.readlines()]) def _get_answer(values): values_length = len(values) for i in range(0, values_length): for j in range(i + 1, values_length): for k in range(i + 2, values_length): x, y, z = values[i], values[j], values[k] if x + y + z == 2020: return (x, y, z, x * y * z) return None print('answer is: %s' % str(_get_answer(values))) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def run(): start_time, answer = util.now(), 0 grid, x = None, 0 with open(os.path.join(INPUT_DIR, 'input03.txt'), 'r') as f: grid = [l.strip() for l in f.readlines()] repeating_width = len(grid[0]) def _is_tree(x, y): """Uses modulus math to extend the columns.""" modulated_x = x % repeating_width return bool(grid[y][modulated_x] == TREE_SYMBOL) for y in range(len(grid)): if _is_tree(x, y): answer += 1 x += 3 print('answer is: %s' % str(answer)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def run(): has_exactly = {2: 0, 3: 0} start_time = util.now() with open(os.path.join(INPUT_DIR, 'input02.dat'), 'r') as f: for line in f.readlines(): entry = line.strip() histogram = {c: 0 for c in entry} for letter in entry: histogram[letter] += 1 for looking_for in (2, 3): has_exactly[looking_for] += 1 if bool( [k for k, v in histogram.items() if v == looking_for]) else 0 answer = has_exactly[2] * has_exactly[3] print('answer is: %s' % str(answer)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def wrapper(*args, **kwarg): url = kwarg.get('url').value start = now() es = EsClient.instance() content = None is_cache_exists = es.exists(url) if is_cache_exists: content = es.get(url) logger.info('Read from es [{}] cache takes {}ms, {}'.format( es.get_index(), take_ms(start), url)) else: ok, result = func(*args, **kwarg) if ok: content = result EsClient.instance().save(url, result) logger.info( 'Save to es cache, takes {}ms, size: {}, url: {}'.format( take_ms(start), len(result), url)) return content
def run(): answer, done = {'a': {}, 'b': {}}, False start_time = util.now() with open(os.path.join(INPUT_DIR, 'input05.dat'), 'r') as f: initial_material = f.read().strip() # part A reduced_material = _reduce(initial_material) answer['a'].update({ 'length': len(reduced_material), 'material': ''.join(reduced_material) }) print('Part One answer is: %d' % answer['a']['length']) # part B attempts = [] for letter in string.ascii_lowercase: print('reducing less %s' % letter) stripped_and_reduced = _reduce( _strip_unit_type(letter, initial_material)) attempts.append({ 'unit_type': letter, 'size': len(stripped_and_reduced) }) ranked = sorted(attempts, key=lambda d: d['size']) answer['b'].update(ranked[0]) print('Part Two answer is: %d' % answer['b']['size']) # summary print('whole answer dict is: %s' % str(answer)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))
def train(model, opt): print(model) data_limit = 1000 if opt.debug_mode else None # use_tensor_board = False opt.result_dir.mkdir(parents=True, exist_ok=True) opt.checkpoints_dir.mkdir(parents=True, exist_ok=True) np.random.seed(0) test_data = load_data(opt.feature_dir, 'test', limit=data_limit) # manual_data = load_data(opt.feature_dir, 'manual', limit=data_limit) abbvid_2_typeid = None if opt.use_abbv_type: abbvid_2_typeid = load_abbvid_2_typeid(opt.abbv_type_file) # if opt.use_pre_sample: # sample_valid_data = sample_data(None, negative_ratio=opt.negative_ratio, # saved_feature_file=(opt.feature_dir / 'valid/sample1_bags_feature.json')) # else: train_data = load_data(opt.feature_dir, 'train', limit=data_limit) valid_data = load_data(opt.feature_dir, 'valid', limit=data_limit) sample_valid_data = sample_data(valid_data, negative_ratio=opt.negative_ratio) if not opt.debug_mode: valid_data = load_data(opt.feature_dir, 'valid', limit=data_limit) # sample_valid_data = sample_data(valid_data, negative_ratio=opt.negative_ratio) sample_all_valid_data = sample_data( valid_data, negative_ratio=opt.test_negative_ratio) else: sample_all_valid_data = sample_valid_data sample_test_data = sample_data(test_data, negative_ratio=opt.test_negative_ratio) # sample_manual_data = sample_data(manual_data, negative_ratio=None) criterion = nn.CrossEntropyLoss() kl_criterion = nn.KLDivLoss() all_parameters = model.parameters() optimizer = optim.Adam(all_parameters, lr=opt.learning_rate) optimizer.zero_grad() # train eval_batch_size = 128 previous_valid_f1 = -1 best_epoch = 1 max_num_epochs = opt.max_num_epochs global_step = 0 for epoch in range(max_num_epochs): total_losses = [] # if opt.use_pre_sample: # sample_train_data = sample_data(None, negative_ratio=opt.negative_ratio, saved_feature_file=( # opt.feature_dir / 'train/sample{}_bags_feature.json'.format(epoch + 1))) # else: sample_train_data = sample_data(train_data, negative_ratio=opt.negative_ratio) g = get_batches(sample_train_data, opt.batch_size) for batch_data in tqdm(g, total=int( len(sample_train_data) / opt.batch_size)): batch_data = model.prepare_data(batch_data, use_gpu=opt.use_gpu, abbvid_2_typeid=abbvid_2_typeid) optimizer.zero_grad() out, type_out = model.get_predictions(batch_data) batch_labels = batch_data['y'] batch_type_labels = batch_data['type_labels'] if batch_type_labels is not None: loss_type = criterion( type_out[:, 0, :], batch_type_labels[:, 0]) + criterion( type_out[:, 1, :], batch_type_labels[:, 1]) match_mask = batch_labels match_mask = match_mask.unsqueeze(1).expand_as( type_out[:, 0, :]).float() t1 = type_out[:, 0, :] * match_mask t2 = type_out[:, 1, :] * match_mask t1 = torch.log_softmax(t1, 1) t2 = torch.softmax(t2, 1) loss_type_match = kl_criterion(t1, t2) else: loss_type = 0 loss_type_match = 0 loss_re = criterion(out, batch_labels) # avg loss of the batch loss = opt.lambda_re * loss_re + opt.lambda_type * loss_type + opt.lambda_type_match * loss_type_match loss.backward() optimizer.step() total_losses.append(loss.item()) global_step += 1 if global_step % opt.step_print_train_loss == 0: avg_loss = torch.mean(torch.Tensor(total_losses)) print(('{} Epoch {}/{}: train loss: {};'.format( now(), epoch + 1, max_num_epochs, avg_loss))) avg_loss = torch.mean(torch.Tensor(total_losses)) print( ('{} Epoch {}/{}: train loss: {};'.format(now(), epoch + 1, max_num_epochs, avg_loss))) # res = print_and_save_metric(model, sample_valid_data, criterion, epoch, max_num_epochs, 'valid') res = print_and_save_metric(model, sample_all_valid_data, criterion, epoch, max_num_epochs, 'valid_all', eval_batch_size, opt.use_gpu, abbvid_2_typeid) if previous_valid_f1 < res['metric_dict']['1']['f1-score']: previous_valid_f1 = res['metric_dict']['1']['f1-score'] res = print_and_save_metric(model, sample_test_data, criterion, epoch, max_num_epochs, 'test', eval_batch_size, opt.use_gpu, abbvid_2_typeid) all_pre, all_rec, thresholds = precision_recall_curve( res['all_true_y'], res['all_pred_p']) best_epoch = epoch + 1 print("save models, best epoch: epoch {};".format(best_epoch)) if not opt.debug_mode: save_pr(str(opt.result_dir), model.model_name, epoch, all_pre, all_rec) model.save(opt.checkpoints_dir / '{}_{}.pth'.format(opt.print_opt, str(epoch + 1))) # _ = print_and_save_metric(model, sample_manual_data, criterion, epoch, max_num_epochs, 'manual', # eval_batch_size, abbvid_2_typeid) print('best epoch is epoch {}.'.format(best_epoch))
def _gen_image_id_(self, *args): now = util.now() return util.md5(now, *args).hexdigest()
def run(): unsorted_lines, answer = {}, {'a': {}, 'b': {}} start_time = util.now() with open(os.path.join(INPUT_DIR, 'input04.dat'), 'r') as f: for line in f.readlines(): entry_time, entry = REGEX['event'].search(line).groups() unsorted_lines[entry_time] = entry observations, guard, nap_start, total_sleepage = OrderedDict(), None, 0, {} for key in sorted(unsorted_lines.keys()): entry = unsorted_lines[key] day, hour, minute = re.split('[\s:]', key) is_new_series = REGEX['guard'].search(entry) if is_new_series: guard = is_new_series.group(1) elif REGEX['sleep'].search(entry): nap_start = int(minute) elif REGEX['wake'].search(entry): if day not in observations: observations[day] = { 'guard': guard, 'is_asleep': [False for i in range(60)] } for i in range(nap_start, int(minute)): observations[day]['is_asleep'][i] = True # part A for entry in observations.values(): id = entry['guard'] total_sleepage[id] = total_sleepage.get(id, 0) + len( [y for y in entry['is_asleep'] if y]) for k, v in total_sleepage.items(): if v > answer['a'].get('sleepage', 0): answer['a'] = {'guard': k, 'sleepage': v} histograms = {} for observed in observations.values(): id = observed['guard'] if id not in histograms: histograms[id] = [0 for i in range(60)] for minute in [i for i, v in enumerate(observed['is_asleep']) if v]: histograms[id][minute] += 1 for i, v in enumerate(histograms[answer['a']['guard']]): if v > answer['a'].get('max_per_minute', 0): answer['a'].update({'worst_minute': i, 'max_per_minute': v}) print('Part One answer is: %d' % (int(answer['a']['guard']) * answer['a']['worst_minute'])) # part B def _most_minute(guard_id, all_histograms): """Find the minute a guard naps most, and how many times.""" ranked = sorted([{ 'minute': i, 'count': v } for i, v in enumerate(all_histograms[guard_id])], key=lambda d: d['count']) return ranked[-1] ranked_likelies = sorted([{ 'guard': k, 'most': _most_minute(k, histograms) } for k in histograms.keys()], key=lambda e: e['most']['count']) answer['b'].update(ranked_likelies[-1]) print('Part Two answer is: %d' % (int(answer['b']['guard']) * answer['b']['most']['minute'])) print('whole answer dict is: %s' % str(answer)) print(' elapsed: %f seconds' % util.elapsed_since(start_time))