예제 #1
0
def run():
    answer = None
    start_time = util.now()

    with open(os.path.join(INPUT_DIR, 'input02.dat'), 'r') as f:
        all_lines = [l.strip() for l in f.readlines()]

    for position in reversed(range(len(all_lines[0]))):
        have_seen = {}

        for word in all_lines:
            letters = list(word)
            del letters[position]
            minus_one = ''.join(letters)

            if minus_one in have_seen:
                answer = minus_one, word, have_seen[minus_one], position + 1
                break

            have_seen[minus_one] = word

        if answer:
            break

    print('answer is: %s (from %s and %s by masking position %d)' % answer)
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #2
0
 def wrapper(*args, **kwargs):
     t = now()
     result = func(*args, **kwargs)
     ms = take_ms(t)
     if ms > 500:
         logger.warn('Execute [{}] takes {}ms'.format(func.__name__, ms))
     return result
예제 #3
0
def print_and_save_metric(model, sampled_data, criterion, epoch,
                          max_num_epochs, data_type, batch_size, use_gpu,
                          abbvid_2_typeid):
    all_true_y, all_pred_y, all_pred_p, loss, type_loss, type_true_y, type_pred_y = eval(
        model, sampled_data, criterion, batch_size, use_gpu, abbvid_2_typeid)

    metric_dict = classification_report(all_true_y,
                                        all_pred_y,
                                        output_dict=True)
    ap = average_precision_score(all_true_y, all_pred_p)

    print(('{} Epoch {}/{}: {} loss: {};'.format(now(), epoch + 1,
                                                 max_num_epochs, data_type,
                                                 loss)))
    print(classification_report(all_true_y, all_pred_y, output_dict=False))
    print('{}, Average Precision:{}'.format(data_type, ap))
    if abbvid_2_typeid is not None:
        print('typing loss', type_loss)
        print(
            classification_report(type_true_y, type_pred_y, output_dict=False))
    res = {
        'metric_dict': metric_dict,
        'ap': ap,
        'loss': loss,
        'all_true_y': all_true_y,
        'all_pred_y': all_pred_y,
        'all_pred_p': all_pred_p
    }

    return res
예제 #4
0
    def wrapper(*args, **kwarg):
        cache_dir = pathlib.Path(CACHE_DIR)
        url = kwarg.get('url').value
        date = datetime.datetime.now().date().isoformat()
        if url:
            items = url.split('/')
            domain = items[2]
            cache_dir = cache_dir / date / domain

        if not cache_dir.exists():
            cache_dir.mkdir(parents=True, exist_ok=True)

        t = now()
        h = hashlib.md5()
        h.update(url.encode())
        cache_file = cache_dir / h.hexdigest()
        if not cache_file.exists():
            ok, result = func(*args, **kwarg)
            if ok:
                with open(cache_file, 'w', encoding='utf8') as fp:
                    fp.write(result)
                    logger.info(
                        'Save to cache, takes {}ms, url: {}, file: {}, size: {}'
                        .format(take_ms(t), url, cache_file, len(result)))
        else:
            logger.info('Read from cache: {}ms, {},{}'.format(
                take_ms(t), str(cache_file), url))
            with open(cache_file, encoding='utf8') as fp:
                result = '\n'.join(fp.readlines())
        return result
예제 #5
0
def run():
    grid, answer = [], 0
    start_time = util.now()

    with open(os.path.join(INPUT_DIR, 'input03.dat'), 'r') as f:
        all_squares = [_define_square(l) for l in f.readlines() if l.strip()]

    max_x = max([s.x + s.width for s in all_squares])
    max_y = max([s.y + s.height for s in all_squares])

    for i in range(max_y + 1):
        grid.append([[] for j in range(max_x + 1)])

    for square in all_squares:
        for i in range(square.y, square.y + square.height):
            for j in range(square.x, square.x + square.width):
                grid[i][j].append(square.id)

    for row in grid:
        for column in row:
            if len(column) > 1:
                answer += 1

    print('answer is: %s' % str(answer))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #6
0
def run():
    starting_frequency, start_time, answer = 0, util.now(), 0
    
    with open(os.path.join(INPUT_DIR, 'input01.dat'), 'r') as f:
        answer = starting_frequency + sum([int(v) for v in f.readlines()])

    print('answer is: %s' % str(answer))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #7
0
def run():
    start_time, answer = util.now(), 0
    puzzle_input = 289326

    grid = Grid()
    x, y = grid.find_position(puzzle_input)
    answer = abs(x) + abs(y)

    print('answer is: %s' % str(answer))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #8
0
def run():
    start_time, answer = util.now(), 0
    input_file = join(dirname(abspath(__file__)), 'inputs', 'input02.dat')

    with open(input_file, 'r') as f:
        for row in f:
            for x, y in evenly_divisible([int(d) for d in row.split()]):
                answer += int(x / y)

    print('answer is: %s' % str(answer))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #9
0
def run():
    start_time = util.now()

    with open(INPUT, 'r') as f:
        digits = f.read().strip()

    wrapped = digits + digits[0]
    answer = sum([int(d) for i, d in enumerate(digits) if d == wrapped[i+1]])

    print('answer is: %s' % str(answer))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #10
0
파일: msg.py 프로젝트: hayate-hsu/cms
def get_messages(groups, mask, isimg, gmtype, label, expired, pos, nums, ismanager, ap_groups=''):
    '''
        get messages 
        filter  : groups, mask
        position: start , per
    '''
    if expired:
        # days = 0 - expired
        # expired = util.now('%Y-%m-%d', days=days)
        expired = util.now('%Y-%m-%d')
        
    return mysql.get_messages(groups, mask, isimg, gmtype, label, expired, pos, nums, ismanager, ap_groups)
예제 #11
0
def run():
    start_time, answer = util.now(), 0
    input_file = join(dirname(abspath(__file__)), 'inputs', 'input02.dat')

    def checksum(row_data):
        return max(row_data) - min(row_data) if any(row_data) else 0

    with open(input_file, 'r') as f:
        for row in f:
            answer += checksum([int(d) for d in row.split()])

    print('answer is: %s' % str(answer))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #12
0
def run():
    start_time, answer = util.now(), 0

    with open(os.path.join(INPUT_DIR, 'input01.dat'), 'r') as f:
        for row in f.readlines():
            fuel = fuel_needed(int(row))

            while fuel > 0:
                answer += fuel
                fuel = fuel_needed(fuel)

    print('answer is: %s' % str(answer))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #13
0
def run():
    start_time, answer = util.now(), None

    with open(os.path.join(INPUT_DIR, 'input01.txt'), 'r') as f:
        values = sorted([int(v) for v in f.readlines()])

    for x in values:
        for y in reversed([n for n in values if n >= x]):
            if x + y == 2020:
                answer = (x, y, x * y)

        if answer:
            break

    print('answer is: %s' % str(answer))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #14
0
def run():
    start_time, answer = util.now(), None

    with open(os.path.join(INPUT_DIR, 'input02.txt'), 'r') as f:
        lines = f.readlines()

    def _is_valid(entry):
        parts = [p for p in SPLITS_PATTERN.split(entry.strip())]
        index_a, index_b, required, given = int(parts[0]) - 1, int(
            parts[1]) - 1, parts[2], parts[3]
        matches = [i for i in (index_a, index_b) if given[i] == required]
        return bool(len(matches) == 1)

    valid_entries = [e for e in lines if _is_valid(e)]
    answer = len(valid_entries)

    print('answer is: %s' % str(answer))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #15
0
def run():
    current_frequency, answer, tried, already_seen = 0, 0, 0, set()
    start_time = util.now()
    
    with open(os.path.join(INPUT_DIR, 'input01.dat'), 'r') as f:
        all_values = [int(v) for v in f.readlines()]
       
    for value in repeating_iterator(all_values):
        current_frequency += int(value)
        tried += 1

        if current_frequency in already_seen:
            answer = current_frequency
            break
        
        already_seen.add(current_frequency)

    print('answer is: %s (found after %d changes)' % (str(answer), tried))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #16
0
def run():
    start_time, answer = util.now(), None

    with open(os.path.join(INPUT_DIR, 'input01.txt'), 'r') as f:
        values = sorted([int(v) for v in f.readlines()])

    def _get_answer(values):
        values_length = len(values)

        for i in range(0, values_length):
            for j in range(i + 1, values_length):
                for k in range(i + 2, values_length):
                    x, y, z = values[i], values[j], values[k]
                    if x + y + z == 2020:
                        return (x, y, z, x * y * z)

        return None

    print('answer is: %s' % str(_get_answer(values)))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #17
0
def run():
    start_time, answer = util.now(), 0
    grid, x = None, 0

    with open(os.path.join(INPUT_DIR, 'input03.txt'), 'r') as f:
        grid = [l.strip() for l in f.readlines()]

    repeating_width = len(grid[0])

    def _is_tree(x, y):
        """Uses modulus math to extend the columns."""
        modulated_x = x % repeating_width
        return bool(grid[y][modulated_x] == TREE_SYMBOL)

    for y in range(len(grid)):
        if _is_tree(x, y):
            answer += 1
        x += 3

    print('answer is: %s' % str(answer))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #18
0
def run():
    has_exactly = {2: 0, 3: 0}
    start_time = util.now()

    with open(os.path.join(INPUT_DIR, 'input02.dat'), 'r') as f:
        for line in f.readlines():
            entry = line.strip()
            histogram = {c: 0 for c in entry}

            for letter in entry:
                histogram[letter] += 1

            for looking_for in (2, 3):
                has_exactly[looking_for] += 1 if bool(
                    [k for k, v in histogram.items()
                     if v == looking_for]) else 0

    answer = has_exactly[2] * has_exactly[3]

    print('answer is: %s' % str(answer))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #19
0
    def wrapper(*args, **kwarg):
        url = kwarg.get('url').value

        start = now()
        es = EsClient.instance()
        content = None
        is_cache_exists = es.exists(url)
        if is_cache_exists:
            content = es.get(url)
            logger.info('Read from es [{}] cache takes {}ms, {}'.format(
                es.get_index(), take_ms(start), url))
        else:
            ok, result = func(*args, **kwarg)
            if ok:
                content = result
                EsClient.instance().save(url, result)
                logger.info(
                    'Save to es cache, takes {}ms, size: {}, url: {}'.format(
                        take_ms(start), len(result), url))

        return content
예제 #20
0
def run():
    answer, done = {'a': {}, 'b': {}}, False
    start_time = util.now()

    with open(os.path.join(INPUT_DIR, 'input05.dat'), 'r') as f:
        initial_material = f.read().strip()

    # part A

    reduced_material = _reduce(initial_material)
    answer['a'].update({
        'length': len(reduced_material),
        'material': ''.join(reduced_material)
    })
    print('Part One answer is: %d' % answer['a']['length'])

    # part B

    attempts = []
    for letter in string.ascii_lowercase:
        print('reducing less %s' % letter)
        stripped_and_reduced = _reduce(
            _strip_unit_type(letter, initial_material))
        attempts.append({
            'unit_type': letter,
            'size': len(stripped_and_reduced)
        })

    ranked = sorted(attempts, key=lambda d: d['size'])
    answer['b'].update(ranked[0])

    print('Part Two answer is: %d' % answer['b']['size'])

    # summary

    print('whole answer dict is: %s' % str(answer))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))
예제 #21
0
def train(model, opt):
    print(model)
    data_limit = 1000 if opt.debug_mode else None
    # use_tensor_board = False

    opt.result_dir.mkdir(parents=True, exist_ok=True)
    opt.checkpoints_dir.mkdir(parents=True, exist_ok=True)

    np.random.seed(0)

    test_data = load_data(opt.feature_dir, 'test', limit=data_limit)
    # manual_data = load_data(opt.feature_dir, 'manual', limit=data_limit)

    abbvid_2_typeid = None
    if opt.use_abbv_type:
        abbvid_2_typeid = load_abbvid_2_typeid(opt.abbv_type_file)

    # if opt.use_pre_sample:
    #     sample_valid_data = sample_data(None, negative_ratio=opt.negative_ratio,
    #                                     saved_feature_file=(opt.feature_dir / 'valid/sample1_bags_feature.json'))
    # else:
    train_data = load_data(opt.feature_dir, 'train', limit=data_limit)
    valid_data = load_data(opt.feature_dir, 'valid', limit=data_limit)
    sample_valid_data = sample_data(valid_data,
                                    negative_ratio=opt.negative_ratio)

    if not opt.debug_mode:
        valid_data = load_data(opt.feature_dir, 'valid', limit=data_limit)
        # sample_valid_data = sample_data(valid_data, negative_ratio=opt.negative_ratio)
        sample_all_valid_data = sample_data(
            valid_data, negative_ratio=opt.test_negative_ratio)
    else:
        sample_all_valid_data = sample_valid_data

    sample_test_data = sample_data(test_data,
                                   negative_ratio=opt.test_negative_ratio)
    # sample_manual_data = sample_data(manual_data, negative_ratio=None)

    criterion = nn.CrossEntropyLoss()
    kl_criterion = nn.KLDivLoss()
    all_parameters = model.parameters()
    optimizer = optim.Adam(all_parameters, lr=opt.learning_rate)
    optimizer.zero_grad()

    # train
    eval_batch_size = 128
    previous_valid_f1 = -1
    best_epoch = 1
    max_num_epochs = opt.max_num_epochs
    global_step = 0

    for epoch in range(max_num_epochs):
        total_losses = []

        # if opt.use_pre_sample:
        #     sample_train_data = sample_data(None, negative_ratio=opt.negative_ratio, saved_feature_file=(
        #             opt.feature_dir / 'train/sample{}_bags_feature.json'.format(epoch + 1)))
        # else:
        sample_train_data = sample_data(train_data,
                                        negative_ratio=opt.negative_ratio)

        g = get_batches(sample_train_data, opt.batch_size)
        for batch_data in tqdm(g,
                               total=int(
                                   len(sample_train_data) / opt.batch_size)):
            batch_data = model.prepare_data(batch_data,
                                            use_gpu=opt.use_gpu,
                                            abbvid_2_typeid=abbvid_2_typeid)
            optimizer.zero_grad()
            out, type_out = model.get_predictions(batch_data)

            batch_labels = batch_data['y']
            batch_type_labels = batch_data['type_labels']

            if batch_type_labels is not None:
                loss_type = criterion(
                    type_out[:, 0, :], batch_type_labels[:, 0]) + criterion(
                        type_out[:, 1, :], batch_type_labels[:, 1])
                match_mask = batch_labels
                match_mask = match_mask.unsqueeze(1).expand_as(
                    type_out[:, 0, :]).float()
                t1 = type_out[:, 0, :] * match_mask
                t2 = type_out[:, 1, :] * match_mask
                t1 = torch.log_softmax(t1, 1)
                t2 = torch.softmax(t2, 1)

                loss_type_match = kl_criterion(t1, t2)
            else:
                loss_type = 0
                loss_type_match = 0

            loss_re = criterion(out, batch_labels)  # avg loss of the batch

            loss = opt.lambda_re * loss_re + opt.lambda_type * loss_type + opt.lambda_type_match * loss_type_match
            loss.backward()
            optimizer.step()
            total_losses.append(loss.item())
            global_step += 1

            if global_step % opt.step_print_train_loss == 0:
                avg_loss = torch.mean(torch.Tensor(total_losses))
                print(('{} Epoch {}/{}: train loss: {};'.format(
                    now(), epoch + 1, max_num_epochs, avg_loss)))

        avg_loss = torch.mean(torch.Tensor(total_losses))
        print(
            ('{} Epoch {}/{}: train loss: {};'.format(now(), epoch + 1,
                                                      max_num_epochs,
                                                      avg_loss)))

        # res = print_and_save_metric(model, sample_valid_data, criterion, epoch, max_num_epochs, 'valid')
        res = print_and_save_metric(model, sample_all_valid_data, criterion,
                                    epoch, max_num_epochs, 'valid_all',
                                    eval_batch_size, opt.use_gpu,
                                    abbvid_2_typeid)

        if previous_valid_f1 < res['metric_dict']['1']['f1-score']:
            previous_valid_f1 = res['metric_dict']['1']['f1-score']
            res = print_and_save_metric(model, sample_test_data, criterion,
                                        epoch, max_num_epochs, 'test',
                                        eval_batch_size, opt.use_gpu,
                                        abbvid_2_typeid)
            all_pre, all_rec, thresholds = precision_recall_curve(
                res['all_true_y'], res['all_pred_p'])

            best_epoch = epoch + 1
            print("save models, best epoch: epoch {};".format(best_epoch))
            if not opt.debug_mode:
                save_pr(str(opt.result_dir), model.model_name, epoch, all_pre,
                        all_rec)
                model.save(opt.checkpoints_dir /
                           '{}_{}.pth'.format(opt.print_opt, str(epoch + 1)))

            # _ = print_and_save_metric(model, sample_manual_data, criterion, epoch, max_num_epochs, 'manual',
            #                           eval_batch_size, abbvid_2_typeid)

    print('best epoch is epoch {}.'.format(best_epoch))
예제 #22
0
파일: cms.py 프로젝트: hayate-hsu/cms
    def _gen_image_id_(self, *args):
        now = util.now()

        return util.md5(now, *args).hexdigest()
예제 #23
0
def run():
    unsorted_lines, answer = {}, {'a': {}, 'b': {}}
    start_time = util.now()

    with open(os.path.join(INPUT_DIR, 'input04.dat'), 'r') as f:
        for line in f.readlines():
            entry_time, entry = REGEX['event'].search(line).groups()
            unsorted_lines[entry_time] = entry

    observations, guard, nap_start, total_sleepage = OrderedDict(), None, 0, {}

    for key in sorted(unsorted_lines.keys()):
        entry = unsorted_lines[key]
        day, hour, minute = re.split('[\s:]', key)

        is_new_series = REGEX['guard'].search(entry)
        if is_new_series:
            guard = is_new_series.group(1)

        elif REGEX['sleep'].search(entry):
            nap_start = int(minute)

        elif REGEX['wake'].search(entry):
            if day not in observations:
                observations[day] = {
                    'guard': guard,
                    'is_asleep': [False for i in range(60)]
                }
            for i in range(nap_start, int(minute)):
                observations[day]['is_asleep'][i] = True

    # part A

    for entry in observations.values():
        id = entry['guard']
        total_sleepage[id] = total_sleepage.get(id, 0) + len(
            [y for y in entry['is_asleep'] if y])

    for k, v in total_sleepage.items():
        if v > answer['a'].get('sleepage', 0):
            answer['a'] = {'guard': k, 'sleepage': v}

    histograms = {}

    for observed in observations.values():
        id = observed['guard']
        if id not in histograms:
            histograms[id] = [0 for i in range(60)]
        for minute in [i for i, v in enumerate(observed['is_asleep']) if v]:
            histograms[id][minute] += 1

    for i, v in enumerate(histograms[answer['a']['guard']]):
        if v > answer['a'].get('max_per_minute', 0):
            answer['a'].update({'worst_minute': i, 'max_per_minute': v})

    print('Part One answer is: %d' %
          (int(answer['a']['guard']) * answer['a']['worst_minute']))

    # part B

    def _most_minute(guard_id, all_histograms):
        """Find the minute a guard naps most, and how many times."""
        ranked = sorted([{
            'minute': i,
            'count': v
        } for i, v in enumerate(all_histograms[guard_id])],
                        key=lambda d: d['count'])
        return ranked[-1]

    ranked_likelies = sorted([{
        'guard': k,
        'most': _most_minute(k, histograms)
    } for k in histograms.keys()],
                             key=lambda e: e['most']['count'])
    answer['b'].update(ranked_likelies[-1])

    print('Part Two answer is: %d' %
          (int(answer['b']['guard']) * answer['b']['most']['minute']))

    print('whole answer dict is: %s' % str(answer))
    print('  elapsed: %f seconds' % util.elapsed_since(start_time))