def do_calc(df, result_col='ratio', ndigits=2, print_n=100000):
    """Do calculation."""
    @profiled(total=len(df), print_n=print_n, name='calc_counter')
    def calc(s):
        x = s['x']
        y = s['y']
        z = s['z']
        return round((z - y) / (y - x), ndigits)

    print_msg('start calculation...')
    print_msg('total rows = {}'.format(len(df)))
    timer = Timer()
    ratios = df.apply(calc, axis=1)
    print_msg('finish calculation ({}s)'.format(timer.elapse(2)))

    print_msg('start appending result column...')
    timer.reset()
    df[result_col] = ratios
    print_msg('finish appending result column ({}s)'.format(timer.elapse(2)))
    return df
def main():
    """Entry."""
    src_dir = 'data'
    out_dir = 'result'
    result_dir = os.path.join(out_dir, 'max_occur')
    src_file = os.path.join(src_dir, 'data.xlsx')
    org_data_file = os.path.join(src_dir, 'org_data.csv')
    sorted_data_file = os.path.join(out_dir, 'data_sorted.csv')
    save_data_sorted = False

    print_msg('start running...')
    main_timer = Timer()

    if not os.path.exists(org_data_file):
        gen_org_data(out_file=org_data_file,
                     xlsx_file=src_file,
                     print_n=100000)

    df = do_calc(org_data_file, ndigits=2, print_n=100000)

    print_msg('start sorting values...')
    timer = Timer()
    data_sorted = df.sort_values(by='ratio')
    print_msg('finish sorting values ({}s)'.format(timer.elapse(2)))

    if save_data_sorted:
        print_msg('start saving sorted data to file...')
        timer.reset()
        data_sorted.to_csv(sorted_data_file, index=False)
        print_msg('finish saving sorted data to file ({}s)'.format(
            timer.elapse(2)))

    print_msg('start grouping data...')
    timer.reset()
    grouped = data_sorted.groupby('ratio')
    print_msg('finish grouping data ({}s)'.format(timer.elapse(2)))

    result = find_max_occur(grouped, print_n=10000)
    save_result(result, result_dir, print_n=10)

    print_msg('finish running ({}s)'.format(main_timer.elapse(2)))
def do_calc(filename, result_col='ratio', ndigits=2, print_n=100000):
    """Do calculation."""
    def get_row_number():
        print_msg('start counting data rows...')
        rows = get_lines_number(filename) - 1
        print_msg('finish counting data rows ({}s)'.format(timer.elapse(2)))
        return rows

    timer = Timer()
    row_number = get_row_number()
    print_msg('total rows = {}'.format(row_number))

    @profiled(total=row_number, print_n=print_n, name='calc_counter')
    def calc(s):
        x = s['x']
        y = s['y']
        z = s['z']
        return round((z - y) / (y - x), ndigits)

    print_msg('start calculation...')
    timer.reset()
    ratios = []
    with open(filename) as f:
        for line in f:
            line = line.split(',')
            try:
                data = {
                    'x': np.float64(line[0]),
                    'y': np.float64(line[1]),
                    'z': np.float64(line[2]),
                }
            except ValueError:
                continue
            ratios.append(calc(data))
    print_msg('finish calculation ({}s)'.format(timer.elapse(2)))

    print_msg('start loading org data...')
    timer.reset()
    df = pd.read_csv(filename)
    print_msg('finish loading org data ({}s)'.format(timer.elapse(2)))

    print_msg('start appending result column...')
    timer.reset()
    df[result_col] = ratios
    print_msg('finish appending result column ({}s)'.format(timer.elapse(2)))
    return df