Esempio n. 1
0
def main():
    args = get_args(__file__)

    labels = [
        'N/A', 'Id. Física', 'Id. Historiográfica', 'Desconocido', 'Perdido'
    ]
    colornames = ['light grey', 'medium green', 'denim blue', 'pale red']

    df = pd.DataFrame(read_table(args.table))
    df['ident'] = df.apply(categorize, axis=1)
    data = df\
        .drop_duplicates(['bid', 'lid'], keep='first')\
        .pivot(index='bid', columns='lid', values='ident')\
        .fillna(0)

    #colors = sns.color_palette('hls', len(data))
    #colors = sns.color_palette('husl', len(data))
    #colors = sns.light_palette('red', len(data))
    colors = sns.xkcd_palette(colornames)

    f, ax = plt.subplots()

    sns.heatmap(data,
                ax=ax,
                square=True,
                linewidth=0.5,
                cmap=ListedColormap(colors),
                cbar=False)

    set_axis(ax, data, as_letters(set(df.year.values)), 'Libros')
    legend(f, ax, labels, colors)
    plotting(plt, args)
Esempio n. 2
0
def plot(args):
    df = pd.DataFrame(read_table(args.table))
    configurer = configs[args.color_by]
    data, labels, colors = configurer(df, args.color_by)

    f, ax = plt.subplots()

    fig = sns.heatmap(
        data,
        ax=ax,
        #square=True,
        linewidth=0.5,
        cmap=ListedColormap(colors),
        cbar=False)

    set_axis(ax, data, as_letters(set(df.year.values)), ylabel='Posición')
    legend(f, ax, labels, colors)

    if args.annotated and args.color_by in ['bid', 'year']:
        df_by_bid = df.drop_duplicates('bid').set_index('bid')
        texts = [
            fig.text(
                15,
                bid,
                df.loc[bid, 'short'],
                fontsize=8,
            ) for bid, row in data.iterrows()
        ]

    plotting(plt, args)
Esempio n. 3
0
def main():
    args = get_args(__file__)

    df = pd.DataFrame(read_table(args.table))

    def track(row):
        # Mark it as id, which will go in red
        return 1000 if row['bid'] == 25 else row['bid']

    df['track'] = df.apply(track, axis=1)
    data = df.pivot(index='pos', columns='lid', values='track')
    colornames = ['light blue', 'bright red']
    colors = sns.xkcd_palette(colornames)

    f, ax = plt.subplots()

    sns.heatmap(data,
                ax=ax,
                square=True,
                linewidth=0.5,
                cmap=ListedColormap(colors),
                cbar=False)

    set_axis(ax, data, as_letters(set(df.year.values)), 'Posición')
    plotting(plt, args)
Esempio n. 4
0
def main():
    # check arguments
    # for arg in sys.argv[1:]:
    if (len(sys.argv) != 4):
        print("Wrong number of arguments .. entered (", len(sys.argv), ")")
        # print(sys.argv, file=sys.stderr)
        print("Usage (", sys.argv[0], "): <data file name>",
              " <output directory name> <Tool ID>")
        sys.exit(1)

    table_name = sys.argv[1]
    out_directory = sys.argv[2]
    tool_id = sys.argv[3]

    # check output directory
    if (not os.path.isdir(out_directory)):
        try:
            os.makedirs(out_directory)
        except OSError as e:
            print("Error creating directory!")
            sys.exit(1)

    #check input csv
    try:
        # T = pd.read_csv(table_name, dtype=str, keep_default_na=False)
        T = common.read_table(table_name)
    except OSError as e:
        print("Error reading csv!")
        sys.exit(1)

    #histogram
    #res = {col:T[col].value_counts() for col in T.columns}
    #print(T['a'].value_counts().get(1))
    sus_dis_values = []

    if tool_id == '1':
        sus_dis_values, ptrns = patterns.find_all_patterns(T, sus_dis_values)
        sus_dis_values = DV_Detector.check_non_conforming_patterns(
            T, sus_dis_values)
    elif tool_id == '2':
        sus_dis_values = RandDMVD.find_disguised_values(T, sus_dis_values)
    elif tool_id == '3':
        sus_dis_values = OD.detect_outliers(T, sus_dis_values)
    elif tool_id == '4':
        sus_dis_values, ptrns = patterns.find_all_patterns(T, sus_dis_values)
        sus_dis_values = DV_Detector.check_non_conforming_patterns(
            T, sus_dis_values)
        sus_dis_values = RandDMVD.find_disguised_values(T, sus_dis_values)
        sus_dis_values = OD.detect_outliers(T, sus_dis_values)
    else:
        print("Unkown option ..", tool_id)
        sys.exit(1)

    common.print_output_data(out_directory, table_name, sus_dis_values)
    common.print_output_data_json(out_directory, table_name, sus_dis_values,
                                  ptrns)
Esempio n. 5
0
def main():
    args = get_args(__file__)

    df = pd.DataFrame(read_table(args.table))
    data = df.pivot(index='pos', columns='lid', values='bid')
    # colors = sns.color_palette('hls', len(data))
    # colors = sns.color_palette('husl', len(data))
    # colors = sns.light_palette('red', len(data))
    colors = sns.light_palette('navy', len(data))

    f, ax = plt.subplots()

    sns.heatmap(data,
                ax=ax,
                square=True,
                linewidth=0.5,
                cmap=ListedColormap(colors),
                cbar=False)

    set_axis(ax, data, as_letters(set(df.year.values)), 'Posición')
    plotting(plt, args)
Esempio n. 6
0
def main():
    args = get_args(__file__)

    # TODO fix axis
    df = pd.DataFrame(read_table(args.table))\
        .pivot(index='pos', columns='lid')\
        .fillna(float('NaN'))

    title = 'Altura de libro por inventario y posición'

    plots = df.height.plot(kind='bar', subplots=True, title=title, grid=True)

    for plot in plots:
        plot.set_title('')
        plot.set_ylabel('')
        plot.legend(loc='upper right', bbox_to_anchor=(1.1, 1))

    visible = set(range(1, len(df), 5))
    for n, label in enumerate(plots[-1].xaxis.get_ticklabels()):
        label.set_visible(n + 1 in visible)

    plotting(plt, args)
Esempio n. 7
0
def main():
    args = get_args(__file__)

    names = ['NA', 'LAT', 'ROM', 'FRAN']
    labels = ['N/A', 'Latín', 'Romance', 'Francés']
    colornames = ['light grey', 'pale red', 'medium green', 'denim blue']

    df = pd.DataFrame(read_table(args.table))
    data = categorical_by(df, 'lang', names)
    colors = sns.xkcd_palette(colornames)

    f, ax = plt.subplots()

    sns.heatmap(data,
                ax=ax,
                square=True,
                linewidth=0.5,
                cmap=ListedColormap(colors),
                cbar=False)

    set_axis(ax, data, as_letters(set(df.year.values)), 'Libros')
    legend(f, ax, labels, colors)
    plotting(plt, args)
Esempio n. 8
0
def main():
    args = get_args(__file__)

    names = ['NA', 'REL', 'CRONIC', 'ANTI']
    labels = ['N/A', 'Religioso', 'Crónicas y Leyes', 'Historia Antigua']
    colornames = ['light grey', 'pale red', 'medium green', 'denim blue']

    df = pd.DataFrame(read_table(args.table))
    #df = pd.read_csv(args.table)
    data = categorical_by(df, 'topic', names)
    colors = sns.xkcd_palette(colornames)

    f, ax = plt.subplots()

    sns.heatmap(data,
                ax=ax,
                square=True,
                linewidth=0.5,
                cmap=ListedColormap(colors),
                cbar=False)

    set_axis(ax, data, as_letters(df.year.values), ylabel='Posición')
    legend(f, ax, labels, colors)
    plotting(plt, args)
Esempio n. 9
0
def main():
    args = get_args(__file__)

    df = pd.DataFrame(read_table(args.table))
    sizes = len(range(int(df['height'].max())))
    data = df.pivot(index='pos', columns='lid', values='height').fillna(0)
    #colors = sns.color_palette('cubehelix', sizes)
    #colors = sns.color_palette('hls', len(data))
    #colors = sns.color_palette('husl', len(data))
    #colors = sns.light_palette('red', sizes)
    #colors = sns.light_palette('navy', sizes)
    colors = sns.light_palette('green', sizes)

    f, ax = plt.subplots()

    sns.heatmap(data,
                ax=ax,
                square=True,
                linewidth=0.5,
                cmap=ListedColormap(colors),
                cbar=False)

    set_axis(ax, data, as_letters(set(df.year.values)), 'Tamaño')
    plotting(plt, args)
Esempio n. 10
0
def main():
    args = get_args(__file__)

    df = pd.DataFrame(read_table(args.table))
    data = df\
        .drop_duplicates(['bid', 'lid'], keep='first')\
        .pivot(index='bid', columns='lid', values='year')\
        .fillna(False)

    #colors = sns.color_palette('hls', len(data))
    #colors = sns.color_palette('husl', len(data))
    colors = sns.light_palette('red', len(data))

    f, ax = plt.subplots()

    sns.heatmap(data,
                ax=ax,
                square=True,
                linewidth=0.5,
                cmap=ListedColormap(colors),
                cbar=False)

    set_axis(ax, data, as_letters(set(df.year.values)), 'Libros')
    plotting(plt, args)
Esempio n. 11
0
# -*- coding: utf-8 -*-
'''
説明:
 A から A' に変換
 A' は A から削除行を除去し,残りを辞書順で整列したもの

入出力: J(A) -> J(A')
'''

from common import read_table, write_table

# input A
a = read_table()  # A

# A から削除行を除去
a1 = [x for x in a if not x.startswith('*')]
# 辞書順で整列
a1.sort()

# output A1
write_table(a1)
Esempio n. 12
0
        # f の定義域に pid が含まれていなければ,f に pid |--> oid を追加
        if pid not in f:
            f[pid] = oid
    return f

def count_if(seq, condition):
    '''
    contition を満たす seq の要素の数
    '''
    return sum(1 for x in seq if condition(x))

def to_map(r):
    return r

# input R,T,A,F'
r =read_table()   # R
t = read_table()  # T
a = read_table()  # A
f1 = read_table() # F'

r_dict = dict(map(lambda x : x.split(','), r))

f = make_pid2oid(t, a) # F
n = len(f1) # |F'|

# 再識別で当たった数
suc = count_if(map(lambda x : x.split(','), f1), lambda pid_oid : f[pid_oid[0]] == pid_oid[1])

# 出力
s = [
    ('1' if suc >= int(r_dict[str(n)]) else '0'), # 安全でないかどうか
Esempio n. 13
0
#!/usr/bin/env python3

# T,A' -> F'

import re
import random
from common import read_table, write_table

## input T,A'
t = read_table()
a1 = read_table()

cids = set()
pids = set()
cid_stocks = {}
item_pids = {}
pid_trnum = {}

for tr in t:
    cid,date,stockid,price,num = tr.split(',')
    cids.add(cid)
    cid_stocks[cid] = cid_stocks.get(cid,[])+[stockid]

for a_tr in a1:
    pid,date,stockid,price,num = a_tr.split(',')
    pids.add(pid) 
    pid_trnum[pid] = pid_trnum.get(pid,0)+1
    m=re.match(r"\{(.*)\}",stockid)
    sclist = m.group(1).split(";") if m else [stockid]
    for sc in sclist:
        item_pids[sc] = item_pids.get(sc,[])+[pid]
Esempio n. 14
0
def main():
    parser = get_parser()
    parser.add_argument('--first', default=3, type=int)
    parser.add_argument('--second', default=4, type=int)
    parser.add_argument('--annotated', action='store_true')
    parser.add_argument('--iterations', default=10, type=int)
    parser.add_argument('--color-by')
    args = get_args(__file__, parser)

    columns = [args.first, args.second]
    df = pd.DataFrame(read_table(args.table))
    data = df[(df.lid == args.first) | (df.lid == args.second)]\
        .pivot(index='bid', columns='lid', values='pos')\
        .sort_values(by=args.first)\
        .fillna(0)\
        .reindex_axis(columns, axis=1)  # assure column order

    # Reindex by position
    meta = Metadata(index='pos',
                    dfs=[df[df.lid == args.first], df[df.lid == args.second]])

    palette_name = None
    title = 'Orden/Orden inventarios {} y {}'\
        .format(to_letter(args.first), to_letter(args.second))
    if not args.color_by:
        # Color based on wether theyre in both inventaries or missing
        data['color'] = data.apply(
            lambda row: any(not row[c] for c in columns), 1)
    else:
        variable_name = variable_names.get(args.color_by, args.color_by)
        title += ' variable "{}"'.format(variable_name)
        data['color'] = data.apply(
            lambda row: meta.get_field(args.color_by, *
                                       [row[c] for c in columns]), 1)

    # Group numerical values in 5 bins/categories
    color_sorter = None
    if args.color_by in ['area', 'height']:
        palette_name = 'YlOrRd'  # yellow to red
        bins = 10 if args.color_by == 'height' else 5
        data['color'] = pd.cut(data['color'], bins, precision=0)

        def color_sorter(e):
            return float(str(e).strip('(').strip(']').split(', ', 1)[0])

    # Assure repeteable colors by setting category-color map
    # before lmplot does it randomly on each run and confuse us
    values = sorted(data['color'].unique(), key=color_sorter)
    colors = sns.color_palette(palette=palette_name, n_colors=len(values))
    palette = dict(zip(values, colors))

    # Use str as column names, otherwise lmplot goes wild
    columns = list(map(str, columns))
    data.columns = columns + ['color']

    p = sns.lmplot(*columns,
                   data=data,
                   hue='color',
                   palette=palette,
                   legend=False,
                   legend_out=True,
                   fit_reg=False,
                   size=7,
                   aspect=1.3)

    # Set top title and space for it
    plt.suptitle(title)
    p.fig.subplots_adjust(top=0.92)

    p.set(ylim=(0, None), xlim=(0, None))

    # Set legend outside graph at center right
    if args.color_by:
        p.fig.subplots_adjust(right=0.85)
        variable_name = variable_names.get(args.color_by, args.color_by)
        plt.legend(bbox_to_anchor=(1.18, 0.7),
                   borderaxespad=0.,
                   title=variable_name)

    if args.annotated:
        texts = [
            p.ax.text(
                first,
                second,
                meta.get_field('short', first, second),
                fontsize=8,
            ) for first, second, color in data.values
        ]
        # for first, second, na in data.values:
        #    # plt.annotate(
        #    #     meta.get(first, second)['short'],
        #    #     #str((first, second)),
        #    #     xy=(first, second),
        #    #     xytext=(first + 1, second + 1),
        #    #     fontsize=8,
        #    # )

        adjust_text(texts,
                    force_points=1.5,
                    lim=args.iterations,
                    arrowprops=dict(arrowstyle="-", color='r', alpha=0.8))

    plotting(plt, args)