def crawling(self):
        i = 0
        x = []
        y = []
        new_sheet = naver_finance.new_sheet
        self.get_price("DJI@DJI", x, y, new_sheet)
        self.get_price("LNS@FTSE100", x, y, new_sheet)
        self.get_price("NAS@IXIC", x, y, new_sheet)
        self.get_price("SPI@SPX", x, y, new_sheet)
        self.get_price("SHS@000001", x, y, new_sheet)
        self.get_price("HSI@HSI", x, y, new_sheet)
        self.get_price("PAS@CAC40", x, y, new_sheet)
        self.get_price("STX@SX5E", x, y, new_sheet)
        self.get_price("IDI@JKSE", x, y, new_sheet)
        self.get_price("NII@NI225", x, y, new_sheet)
        self.get_price("XTR@DAX30", x, y, new_sheet)
        self.get_price("BRI@BVSP", x, y, new_sheet)
        self.get_price("RUI@RTSI", x, y, new_sheet)
        self.get_price("MYI@KLSE", x, y, new_sheet)
        self.get_price("NAS@SOX", x, y, new_sheet)

        wb = naver_finance.wb
        wb.save('D:\\naver_finance.xlsx')
        
        fm.get_fontconfig_fonts()
        font_location = 'C:/Windows/Fonts/malgun.ttf'
        font_name = fm.FontProperties(fname = font_location).get_name()
        pyplot.rc('font', family=font_name)
        pyplot.rcParams["figure.figsize"] = (14, 7 )
        data_count = len(x)
        ypos = np.arange(data_count)
        rects = pyplot.barh(ypos, y, align = 'center', height = 0.7)
        pyplot.yticks(ypos, x)
        pyplot.xlabel('전일대비 변동량')
        pyplot.show()
Example #2
0
def composer_histogram(composers):
    global composer_set
    composer_cnt = Counter()
    composers = list(composers)
    sz = 0
    for composer in composers:
        if composer is np.nan:
            continue
        sz += 1
        for name in ast.literal_eval(composer):
            composer_cnt[name] += 1
    s = sum(composer_cnt.values())
    a_list = [(x[0], round(x[1] / s * 100, 2)) for x in composer_cnt.items()
              if x[1] > 20]
    a_list = sorted(a_list, key=lambda x: x[1], reverse=True)
    xdata = [x[0] for x in a_list]
    composer_set = set(xdata)
    ydata = [x[1] for x in a_list]
    sns.set_style("whitegrid")
    fm.get_fontconfig_fonts()
    font_location = r'C:/Windows/Fonts/NanumBarunGothic.ttf'
    font_name = fm.FontProperties(fname=font_location).get_name()
    mpl.rc('font', family=font_name)
    g = sns.barplot(x=xdata, y=ydata)
    g.set_title("20곡 이상을 작업한 작곡가(총{}곡)(%)".format(sz))
    g.set_xticklabels(xdata, rotation=70)
    for p in g.patches:
        height = p.get_height()
        g.text(p.get_x() + p.get_width() / 2.,
               height,
               '{:1.2f}'.format(height),
               ha="center")
    plt.show()
Example #3
0
def set_fonts():
    if os.name == 'posix':
        fm.get_fontconfig_fonts()
        font_location = '/Library/Fonts/NanumSquareOTFRegular.otf'
        font_name = fm.FontProperties(fname=font_location).get_name()
        rc('font', family=font_name)
    else:
        rc('font', family='NanumSquare')
Example #4
0
def set_fonts():
    if os.name == 'posix':
        fm.get_fontconfig_fonts()
        font_location = '/Library/Fonts/NanumSquareOTFRegular.otf'
        font_name = fm.FontProperties(fname=font_location).get_name()
        rc('font', family=font_name)
    else:
        rc('font', family='NanumSquare')
Example #5
0
def set_fonts(name=None):
    if os.name == 'posix':
        fm.get_fontconfig_fonts()
        font_location = '/Library/Fonts/NanumSquareOTFRegular.otf'
        font_name = fm.FontProperties(fname=font_location).get_name()
        rc('font', family=font_name)
    else:
        if name is not None:
            rc('font', family=name)
            if fm.FontProperties().get_name() == 'DejaVu Sans':
                rc('font', family='NanumSquareRound')
        else:
            rc('font', family='NanumSquareRound')
Example #6
0
    def plot_with_labels(low_dim_embs, labels, filename='tsne_' + str(word2vec_dim) + '.png'):
        import matplotlib
        matplotlib.use('Agg')

        # font 설정
        import matplotlib.pyplot as plt
        from matplotlib import font_manager, rc

        print("font_list: ", font_manager.get_fontconfig_fonts())
        font_name = font_manager.FontProperties(fname='/Library/Fonts/NanumSquareBold.ttf').get_name()
        rc('font', family=font_name)

        assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
        plt.figure(figsize=(18, 18))  # in inches
        for i, label in enumerate(labels):
            x, y = low_dim_embs[i, :]
            plt.scatter(x, y)
            plt.annotate(label,
                         xy=(x, y),
                         xytext=(5, 2),
                         textcoords='offset points',
                         ha='right',
                         va='bottom')

        plt.savefig(filename)
Example #7
0
def crnn_infer(axes, model_path="./weights/crnn.pth", visualize=False):
    fm.get_fontconfig_fonts()
    font_name = fm.FontProperties(fname="./data/fonts/H2GTRM.TTF").get_name()
    plt.rc('font', family=font_name)

    toTensor = transforms.ToTensor()
    labels = []

    if visualize:
        for img_num, img in enumerate(axes):
            vis_img = deepcopy(img)
            img = toTensor(img)
            img = img.view(1, *img.size())

            crnn = CRNN(64, 3, 1443, 256)
            crnn.load_state_dict(torch.load(model_path, map_location="cpu"))
            preds = crnn(img)
            predict = mapping_seq(preds)
            label = get_seq2str(predict[0])
            label = "".join(label)

            plt.imshow(vis_img)
            plt.title("predict label : " + "".join(label))
            plt.show()

            labels.append(label)

        return labels

    else:
        for img_num, img in enumerate(axes):
            img = toTensor(img)
            img = img.view(1, *img.size())

            crnn = CRNN(64, 3, 1443, 256)
            crnn.load_state_dict(torch.load(model_path, map_location="cpu"))
            preds = crnn(img)
            predict = mapping_seq(preds)
            label = get_seq2str(predict[0])
            label = "".join(label)

            labels.append(label)

        return labels
Example #8
0
def genre_histogram(genres):
    genre_cnt = Counter()
    genres = list(genres)
    for genre in genres:
        genre_cnt[genre] += 1
    s = sum(genre_cnt.values())
    a_list = genre_cnt.items()
    a_list = [(x[0], round(x[1] / s * 100, 2)) for x in a_list]
    a_list = sorted(a_list, key=lambda x: x[1], reverse=True)
    xdata = [x[0] for x in a_list]
    ydata = [x[1] for x in a_list]
    for idx, genre in enumerate(xdata):
        genre_list = genre.split(",")
        if len(genre_list) == 2:
            if genre_cnt[genre_list[0]] > genre_cnt[genre_list[1]]:
                genre_fix_dict[genre] = genre_list[0]
            else:
                genre_fix_dict[genre] = genre_list[1]
        elif ydata[idx] < 0.9:
            genre_fix_dict[genre] = "etc"
        else:
            genre_fix_dict[genre] = genre
    sns.set_style("whitegrid")
    fm.get_fontconfig_fonts()
    font_location = r'C:/Windows/Fonts/NanumBarunGothic.ttf'
    font_name = fm.FontProperties(fname=font_location).get_name()
    mpl.rc('font', family=font_name)
    g = sns.barplot(x=xdata, y=ydata)
    g.set_title("장르(총{}곡)(%)".format(s))
    g.set_xticklabels(xdata, rotation=70)
    for p in g.patches:
        height = p.get_height()
        g.text(p.get_x() + p.get_width() / 2.,
               height,
               '{:1.2f}'.format(height),
               ha="center")
    plt.show()
Example #9
0
def plot_word_embeddng(wv_model_ko):

    embedding_weights = wv_model_ko.wv.syn0
    final_embeddings = embedding_weights
    labels = wv_model_ko.wv.index2word

    import matplotlib
    matplotlib.use('Agg')
    from matplotlib import font_manager, rc

    print("font_list: ", font_manager.get_fontconfig_fonts())
    font_name = font_manager.FontProperties(
        fname='/Library/Fonts/NanumSquareBold.ttf').get_name()
    rc('font', family=font_name)

    def plot_with_labels(low_dim_embs,
                         labels,
                         filename='./data_out/tsne_' + str(args.word2vec_dim) +
                         '.png'):
        assert low_dim_embs.shape[0] >= len(
            labels), "More labels than embeddings"
        plt.figure(figsize=(18, 18))  # in inches
        for i, label in enumerate(labels):
            x, y = low_dim_embs[i, :]
            plt.scatter(x, y)
            plt.annotate(label,
                         xy=(x, y),
                         xytext=(5, 2),
                         textcoords='offset points',
                         ha='right',
                         va='bottom')

        plt.savefig(filename)

    try:
        from sklearn.manifold import TSNE
        import matplotlib.pyplot as plt

        tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
        plot_only = 500
        low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :])
        labels = [labels[i] for i in range(plot_only)]
        plot_with_labels(low_dim_embs, labels)

    except ImportError:
        print(
            "Please install sklearn, matplotlib, and scipy to visualize embeddings."
        )
def main(argv):
    """
    The main driving function.
    
    Author: SMM
    Date: 08/01/2018
    """

    if len(argv) == 0:
        print_welcome()
        quit()
    else:
        print("Let me load the LSDMappingTools functions for you.")
        set_path()

    print("Let me check the fonts. They are:")
    import matplotlib.font_manager as fm
    flist = fm.get_fontconfig_fonts()
    names = [fm.FontProperties(fname=fname).get_name() for fname in flist]
    print(names)

    fm.findfont('Liberation Sans', rebuild_if_missing=True)

    print("The arguments are: ")
    print(argv)

    # import Maping tools
    import LSDMapWrappers as LSDMW
    DataDir = os.getcwd() + os.sep
    DataFname = "WA"

    argv[0] = int(argv[0])

    if argv[0] == 1:
        print("Getting basic hillshade")
        LSDMW.SimpleHillshade(DataDir, DataFname)
    elif argv[0] == 2:
        print("Plotting some basins")
        LSDMW.PrintBasins(DataDir, DataFname)
    elif argv[0] == 3:
        print("Plotting the channels")
        LSDMW.PrintChannels(DataDir, DataFname)
    else:
        print("I didn't understand what you wanted.")
        print("Your choice was:" + str(argv[0]))
Example #11
0
def plot_calls(values,
               miscs,
               title=None,
               specify_count=0,
               print_std=False,
               no_color=False,
               call_option=None,
               legends=False):
    # pX, pZ, result, st, sb
    # inn, bc, stuff, speed
    from matplotlib import font_manager, rc
    import os
    if os.name == 'posix':
        import matplotlib.font_manager as fm
        fm.get_fontconfig_fonts()
        font_location = '/Library/Fonts/NanumSquareOTFRegular.otf'
        font_name = fm.FontProperties(fname=font_location).get_name()
        rc('font', family=font_name)
    else:
        rc('font', family='NanumSquare')

    lb = -1.5  # leftBorder
    rb = +1.5  # rightBorder
    tb = +4.0  # topBorder
    bb = +1.0  # bottomBorder

    ll = -17 / 24  # leftLine
    rl = +17 / 24  # rightLine
    tl = +3.325  # topLine
    bl = +1.579  # bototmLine

    oll = -17 / 24 - 1 / 8  # outerLeftLine
    orl = +17 / 24 + 1 / 8  # outerRightLine
    otl = +3.325 + 1 / 8  # outerTopLine
    obl = +1.579 - 1 / 8  # outerBottomLine

    if print_std is True:
        tb = +21 / 12
        bb = -21 / 12
        tl = +1.0
        bl = -1.0
        otl = +1.0 + 1 / 8
        obl = -1.0 - 1 / 8

    # strikes, balls
    fig, ax = plt.subplots(1, 1)
    fig.set_size_inches(4, 4)
    fig.set_dpi(80)
    fig.set_facecolor('#898f99')

    ax.set_facecolor('#898f99')
    ax.tick_params(axis='x', colors='white')
    ax.tick_params(axis='x', colors='white')

    if title is not None:
        st = fig.suptitle(title, fontsize=20)
        st.set_color('white')
        st.set_weight('bold')
        st.set_horizontalalignment('center')

    if call_option is None:
        svalues = values[np.where(values[:, 2] == 2)[0]]

        bvalues = values[np.where(values[:, 2] == 1)[0]]
        if print_std is True:
            for row in svalues:
                st = row[3]
                sb = row[4]
                row[1] = (row[1] - (st + sb) / 2) / ((st - sb) / 2)
            for row in bvalues:
                st = row[3]
                sb = row[4]
                row[1] = (row[1] - (st + sb) / 2) / ((st - sb) / 2)

        if specify_count <= 0:
            plt.scatter(svalues[:, 0],
                        svalues[:, 1],
                        color='#ef2926',
                        alpha=.5,
                        s=np.pi * 50,
                        label='스트라이크')
            if no_color is True:
                plt.scatter(bvalues[:, 0],
                            bvalues[:, 1],
                            color='#ef2926',
                            alpha=.5,
                            s=np.pi * 50)
            else:
                plt.scatter(bvalues[:, 0],
                            bvalues[:, 1],
                            color='#3245ef',
                            alpha=.5,
                            s=np.pi * 50,
                            label='볼')
        else:
            smiscs = miscs[np.where(values[:, 2] == 2)[0]]
            bmiscs = miscs[np.where(values[:, 2] == 1)[0]]
            plt.scatter(svalues[np.where(smiscs[:, 1] == specify_count), 0],
                        svalues[np.where(smiscs[:, 1] == specify_count), 1],
                        color='#ef2926',
                        alpha=.5,
                        s=np.pi * 50,
                        label='{}구'.format(specify_count))
            plt.scatter(svalues[np.where(smiscs[:, 1] != specify_count), 0],
                        svalues[np.where(smiscs[:, 1] != specify_count), 1],
                        color='#3245ef',
                        alpha=.5,
                        s=np.pi * 50)

            for r, m in zip(svalues, smiscs):
                # pX, pZ, result, st, sb
                # inn, bc, stuff, speed
                if m[1] == specify_count:
                    if (lb < r[0]) and (r[0] < rb) and (bb < r[1]) and (r[1] <
                                                                        tb):
                        ax.text(r[0],
                                r[1] - 0.05,
                                str(specify_count),
                                color='white',
                                fontsize=10,
                                horizontalalignment='center')

            plt.scatter(bvalues[np.where(bmiscs[:, 1] == specify_count), 0],
                        bvalues[np.where(bmiscs[:, 1] == specify_count), 1],
                        color='#ef2926',
                        alpha=.5,
                        s=np.pi * 50,
                        label='{}구'.format(specify_count))
            plt.scatter(bvalues[np.where(bmiscs[:, 1] != specify_count), 0],
                        bvalues[np.where(bmiscs[:, 1] != specify_count), 1],
                        color='#3245ef',
                        alpha=.5,
                        s=np.pi * 50)

            for r, m in zip(bvalues, bmiscs):
                # pX, pZ, result, st, sb
                # inn, bc, stuff, speed
                if m[1] == specify_count:
                    if (lb < r[0]) and (r[0] < rb) and (bb < r[1]) and (r[1] <
                                                                        tb):
                        ax.text(r[0],
                                r[1] - 0.05,
                                str(int(r[4])),
                                color='white',
                                fontsize=10,
                                horizontalalignment='center')
    else:
        if type(call_option) is list:
            tvalues = None
            tmiscs = None
            for co in call_option:
                c = Results[co].value
                if tvalues is None:
                    tvalues = values[np.where(values[:, 2] == c)[0]]
                else:
                    tvalues = np.vstack(
                        (tvalues, values[np.where(values[:, 2] == c)[0]]))
                if print_std is True:
                    for row in tvalues:
                        st = row[3]
                        sb = row[4]
                        row[1] = (row[1] - (st + sb) / 2) / ((st - sb) / 2)

            if specify_count <= 0:
                for co in call_option:
                    c = Results[co].value
                    if no_color is True:
                        plt.scatter(tvalues[np.where(tvalues[:, 2] == c), 0],
                                    tvalues[np.where(tvalues[:, 2] == c), 1],
                                    color='#ef2926',
                                    alpha=.5,
                                    s=np.pi * 50)
                    else:
                        plt.scatter(tvalues[np.where(tvalues[:, 2] == c), 0],
                                    tvalues[np.where(tvalues[:, 2] == c), 1],
                                    color=Colors[c],
                                    alpha=.5,
                                    s=np.pi * 50,
                                    label=co)
            else:
                tmiscs = None
                for co in call_option:
                    c = Results[co].value
                    if tmiscs is None:
                        tmiscs = miscs[np.where(values[:, 2] == c)[0]]
                    else:
                        tmiscs = np.vstack(
                            (tmiscs, miscs[np.where(values[:, 2] == c)[0]]))
                plt.scatter(tvalues[np.where(tmiscs[:, 1] == specify_count),
                                    0],
                            tvalues[np.where(tmiscs[:, 1] == specify_count),
                                    1],
                            color='#ef2926',
                            alpha=.5,
                            s=np.pi * 50,
                            label='{}구'.format(specify_count))
                plt.scatter(tvalues[np.where(tmiscs[:, 1] != specify_count),
                                    0],
                            tvalues[np.where(tmiscs[:, 1] != specify_count),
                                    1],
                            color='#3245ef',
                            alpha=.5,
                            s=np.pi * 50)

                for r, m in zip(tvalues, tmiscs):
                    # pX, pZ, result, st, sb
                    # inn, bc, stuff, speed
                    if m[1] == specify_count:
                        if (lb < r[0]) and (r[0] < rb) and (bb < r[1]) and (
                                r[1] < tb):
                            ax.text(r[0],
                                    r[1] - 0.05,
                                    str(specify_count),
                                    color='white',
                                    fontsize=10,
                                    horizontalalignment='center')
        elif type(call_option) is str:
            c = Results[call_option].value
            tvalues = values[np.where(values[:, 2] == c)[0]]
            if print_std is True:
                for row in tvalues:
                    st = row[3]
                    sb = row[4]
                    row[1] = (row[1] - (st + sb) / 2) / ((st - sb) / 2)

            if specify_count <= 0:
                if no_color is True:
                    plt.scatter(tvalues[:, 0],
                                tvalues[:, 1],
                                color='#ef2926',
                                alpha=.5,
                                s=np.pi * 50,
                                label=call_option)
                else:
                    plt.scatter(tvalues[:, 0],
                                tvalues[:, 1],
                                color=Colors[c],
                                alpha=.5,
                                s=np.pi * 50,
                                label=call_option)
            else:
                tmiscs = miscs[np.where(values[:, 2] == c)[0]]
                plt.scatter(tvalues[np.where(tmiscs[:, 1] == specify_count),
                                    0],
                            tvalues[np.where(tmiscs[:, 1] == specify_count),
                                    1],
                            color='#ef2926',
                            alpha=.5,
                            s=np.pi * 50,
                            label='{}구'.format(specify_count))
                plt.scatter(tvalues[np.where(tmiscs[:, 1] != specify_count),
                                    0],
                            tvalues[np.where(tmiscs[:, 1] != specify_count),
                                    1],
                            color='#3245ef',
                            alpha=.5,
                            s=np.pi * 50)

                for r, m in zip(tvalues, tmiscs):
                    # pX, pZ, result, st, sb
                    # inn, bc, stuff, speed
                    if m[1] == specify_count:
                        if (lb < r[0]) and (r[0] < rb) and (bb < r[1]) and (
                                r[1] < tb):
                            ax.text(r[0],
                                    r[1] - 0.05,
                                    str(specify_count),
                                    color='white',
                                    fontsize=10,
                                    horizontalalignment='center')
                    else:
                        plt.scatter(r[0],
                                    r[1],
                                    color='#3245ef',
                                    alpha=.5,
                                    s=np.pi * 50)
        else:
            print()
            print('ERROR: call option must be string/list')
            exit(1)

    plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.axis([lb, rb, bb, tb])

    x = np.arange(lb, rb, 1 / 12)
    y = np.arange(bb, tb, 1 / 12)
    X, Y = np.meshgrid(x, y)

    plt.rcParams['axes.unicode_minus'] = False
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    plt.axis('off')
    ax.autoscale_view('tight')

    if (legends is True) and (no_color is False):
        plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=2)

    plt.show()
Example #12
0
def liste_polices():
    for fname in font_manager.get_fontconfig_fonts():
        try:
            yield font_manager.FontProperties(fname=fname).get_name()
        except RuntimeError:
            pass
Example #13
0
wordcloud = WordCloud(font_path="C:/Windows/Fonts/Art.ttf",
                      relative_scaling=0.2,
                      background_color='white'
                      ).generate_from_frequencies(tmp_data)
plt.figure(figsize=(500,500))
plt.imshow(wordcloud)
plt.axis("off")
plt.show()

import matplotlib as mpl
import matplotlib.pylab as plt

plt.plot(visual['판매금액'])
plt.suptitle("즐")

mpl.matplotlib_fname()
font_manager.get_fontconfig_fonts()

import matlab
a=fitdist(visual['판매금액'],'normal')

import numpy as np
from scipy.stats import norm
import matplotlib.mlab as mlab

plt.plot(visual['판매금액'],norm.pdf(visual['판매금액'],0,2))
from sklearn.preprocessing import scale
sca = scale(visual['판매금액'])
plt.figure(figsize=(300,500))
plt.plot(sca,mlab.normpdf(sca,0,1),c="b",lw=5,ls="--",marker = "o",ms=15,mec="g",mew=5, mfc="r")
Example #14
0
    def mk_portfolio(self):
        """포트폴리오 만드는 함수, r1: ETF비율, r2 : 채권 비율"""
        capital = self.user_info[0] * 10000
        if self.user_info[7] == self.risk_list[0]:
            r1 = 1
            r2 = 0.67
        elif self.user_info[7] == self.risk_list[1]:
            r1 = 0.8
            r2 = 0.4
        elif self.user_info[7] == self.risk_list[2]:
            r1 = 0.6
            r2 = 0.3
        elif self.user_info[7] == self.risk_list[3]:
            r1 = 0.4
            r2 = 0.1
        elif self.user_info[7] == self.risk_list[4]:
            r1 = 0.2
            r2 = 0

        if self.user_info[1] == self.term_list[0] or self.user_info[
                1] == self.term_list[1]:
            r2 = 0  # 투자 기간이 짧으면 채권 제외

        real_r0 = int((1 - r1) * 100)
        real_r1 = int((r1 - r2) * 100)
        real_r2 = int(r2 * 100)

        recommender = Recommender(self.path, self.stock_path, self.etf_path,
                                  self.user_info[5])

        recommender.cal_weight()
        rec_stock_lst = recommender.rec_stock()

        df = pd.read_csv(self.path + "/data/stock_list2.csv", encoding="cp949")
        names = [i[0] for i in rec_stock_lst]
        a = list(df[df["종목명"].isin(names)][["종목명", "가중치"]].sort_values(
            by="가중치", ascending=False).종목명.values)
        rec_stock_lst.sort(key=lambda x: a.index(x[0]))
        # print(rec_stock_lst)

        # 중복의 경우 처리필요

        res_etf1, res_etf2 = recommender.rec_etf()
        print("\n\n고객님의 포트폴리오입니다.\n")

        주식리스트 = []
        채권리스트 = []
        일반리스트 = []

        주식별금액리스트 = []
        채권별금액리스트 = []
        일반별금액리스트 = []

        self.portfolios1, penny1 = self.dist(capital, rec_stock_lst, 1 - (r1),
                                             10)
        print("\n주식 종목 : {}원\n".format(capital * (1 - r1) - penny1))
        for name, info in self.portfolios1.items():
            print("{}, {}개 매입. {} 전략. 현재가: {}".format(name, info[0],
                                                      info[1][1], info[1][0]))
            주식리스트.append(name)
            주식별금액리스트.append(info[1])

        self.portfolios2, penny2 = self.dist(capital + penny1, res_etf1, r2, 5)
        print("\n채권 ETF 종목 : {}원\n".format((capital + penny1) * r2 - penny2))
        for name, info in self.portfolios2.items():
            print("{}, {}개 매입.기간 내 보유 권장. 현재가: {}".format(
                name, info[0], info[1][0]))
            채권리스트.append(name)
            채권별금액리스트.append(info[1])

        self.portfolios3, penny3 = self.dist(capital + penny2, res_etf2,
                                             r1 - r2, 5)
        print("\n일반 ETF 종목 : {}원\n".format((capital + penny2) * (r1 - r2) -
                                           penny3))
        for name, info in self.portfolios3.items():
            print("{}, {}개 매입. 20일 후 리밸런싱 권장. 현재가: {}".format(
                name, info[0], info[1][0]))
            일반리스트.append(name)
            채권별금액리스트.append(info[1])

        # 포트폴리오 1번 보여주기
        self.portfolio_viz()

        ## 포트폴리오 상세정보
        주식금액 = capital * (1 - r1) - penny1
        채권금액 = (capital + penny1) * r2 - penny2
        일반금액 = (capital + penny2) * (r1 - r2) - penny3

        # 막대 그래프 생성
        kindx = ["주식", "일반 ETF", "채권 ETF"]
        values = [주식금액, 일반금액, 채권금액]
        colors = ["silver", "gold", "lightgray"]

        fm.get_fontconfig_fonts()
        font_name = fm.FontProperties(fname=self.fontpath).get_name()
        plt.rc("font", family=font_name, size=20)

        fig = plt.figure(figsize=(7, 7))
        plt.bar(kindx, values, width=0.6, color=colors, edgecolor="lightgray")

        plt.savefig(self.path + "/red/interface/image/portfolio/bar_chart.png")
        plt.close()

        # 경로별 이미지 불러오기
        im_tend = Image.open(self.path +
                             "/red/interface/image/portfolio/red_3.png")
        im_chart = Image.open(self.path +
                              "/red/interface/image/portfolio/bar_chart.png")
        font = ImageFont.truetype(self.fontpath, 24)

        # 칼라 설정
        b, g, r, a = 0, 0, 0, 0

        # 이미지에 텍스트 삽입
        draw = ImageDraw.Draw(im_tend)

        if real_r0 == 80:  # 80 : 20 : 00
            try:
                draw.text((635, 120),
                          str(주식리스트[0]),
                          font=font,
                          fill=(b, g, r, a))
            except:
                draw.text((0, 0), "", font=font, fill=(b, g, r, a))
            try:
                draw.text((635, 164.333),
                          str(주식리스트[1]),
                          font=font,
                          fill=(b, g, r, a))
            except:
                draw.text((0, 0), "", font=font, fill=(b, g, r, a))
            try:
                draw.text((635, 208.666),
                          str(주식리스트[2]),
                          font=font,
                          fill=(b, g, r, a))
            except:
                draw.text((0, 0), "", font=font, fill=(b, g, r, a))
            try:
                draw.text((635, 253),
                          str(주식리스트[3]),
                          font=font,
                          fill=(b, g, r, a))
            except:
                draw.text((0, 0), "", font=font, fill=(b, g, r, a))
            try:
                draw.text((635, 297.333),
                          str(주식리스트[4]),
                          font=font,
                          fill=(b, g, r, a))
            except:
                draw.text((0, 0), "", font=font, fill=(b, g, r, a))
            try:
                draw.text((635, 341.666),
                          str(일반리스트[0]),
                          font=font,
                          fill=(b, g, r, a))
            except:
                draw.text((0, 0), "", font=font, fill=(b, g, r, a))
            try:
                draw.text((635, 386),
                          str(일반리스트[1]),
                          font=font,
                          fill=(b, g, r, a))
            except:
                draw.text((0, 0), "", font=font, fill=(b, g, r, a))
            try:
                draw.text((805, 430.333), "···", font=font, fill=(b, g, r, a))
            except:
                draw.text((0, 0), "", font=font, fill=(b, g, r, a))
        elif real_r0 == 60:  # 60 : 30 : 10
            if real_r2 == 0:
                try:
                    draw.text((635, 120),
                              str(주식리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 164.333),
                              str(주식리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 208.666),
                              str(주식리스트[2]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 253),
                              str(주식리스트[3]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 297.333),
                              str(일반리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 341.666),
                              str(일반리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 386),
                              str(일반리스트[2]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((805, 430.333),
                              "···",
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
            else:
                try:
                    draw.text((635, 120),
                              str(주식리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 164.333),
                              str(주식리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 208.666),
                              str(주식리스트[2]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 253),
                              str(주식리스트[3]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 297.333),
                              str(채권리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 341.666),
                              str(채권리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 386),
                              str(일반리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((805, 430.333),
                              "···",
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
        elif real_r0 == 40:  # 40 : 30 : 30
            if real_r2 == 0:
                try:
                    draw.text((635, 120),
                              str(주식리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 164.333),
                              str(주식리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 208.666),
                              str(주식리스트[2]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 253),
                              str(일반리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 297.333),
                              str(일반리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 341.666),
                              str(일반리스트[2]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 386),
                              str(일반리스트[3]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((805, 430.333),
                              "···",
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
            else:
                try:
                    draw.text((635, 120),
                              str(주식리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 164.333),
                              str(주식리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 208.666),
                              str(주식리스트[2]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 253),
                              str(채권리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 297.333),
                              str(채권리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 341.666),
                              str(일반리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 386),
                              str(일반리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((805, 430.333),
                              "···",
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
        elif real_r0 == 19:  # 19 : 40 : 40
            if real_r2 == 0:
                try:
                    draw.text((635, 120),
                              str(주식리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 164.333),
                              str(주식리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 208.666),
                              str(일반리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 253),
                              str(일반리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 297.333),
                              str(일반리스트[2]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 341.666),
                              str(일반리스트[3]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 386),
                              str(일반리스트[4]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((805, 430.333),
                              "···",
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
            else:
                try:
                    draw.text((635, 120),
                              str(주식리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 164.333),
                              str(주식리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 208.666),
                              str(채권리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 253),
                              str(채권리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 297.333),
                              str(채권리스트[2]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 341.666),
                              str(일반리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 386),
                              str(일반리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((805, 430.333),
                              "···",
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
        elif real_r0 == 0:  # 0 : 33 : 67
            if real_r2 == 0:
                try:
                    draw.text((635, 120),
                              str(일반리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 164.333),
                              str(일반리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 208.666),
                              str(일반리스트[2]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 253),
                              str(일반리스트[3]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 297.333),
                              str(일반리스트[4]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
            else:
                try:
                    draw.text((635, 120),
                              str(채권리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 164.333),
                              str(채권리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 208.666),
                              str(채권리스트[2]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 253),
                              str(일반리스트[0]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 297.333),
                              str(일반리스트[1]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 341.666),
                              str(일반리스트[2]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((635, 386),
                              str(일반리스트[3]),
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))
                try:
                    draw.text((805, 430.333),
                              "···",
                              font=font,
                              fill=(b, g, r, a))
                except:
                    draw.text((0, 0), "", font=font, fill=(b, g, r, a))

        # 이미지에 파이차트 삽입
        im_tend.paste(im_chart, (30, 10))

        display(im_tend)

        # 마무리
        # portfolios4 = dict(portfolios1, **portfolios2)
        # portfolios4.update(portfolios3)
        return self.portfolios1, self.portfolios2, self.portfolios3
Example #15
0
def main(data, visulize):

    import tensorflow as tf
    import warnings
    import os
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
    warnings.filterwarnings("ignore")
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    import time
    import re
    import pickle
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import matplotlib.font_manager as fm
    import seaborn as sns

    from sklearn.model_selection import train_test_split
    from preprocessing.preprocessing_code_190418 import preprocess, title_catcher, date_process, phone_process, time_process, title_process
    from konlpy.tag import Komoran

    import keras
    from keras import backend as K
    from keras.layers import Input, Embedding, Bidirectional, CuDNNLSTM, BatchNormalization
    from keras.layers import RepeatVector, Permute, Multiply, Lambda, TimeDistributed
    from keras.layers import Dense, Flatten
    from keras.models import Model, Sequential, model_from_json
    from keras.callbacks import ModelCheckpoint
    from keras.optimizers import Adam
    from keras.engine.topology import Layer
    from keras.preprocessing.sequence import pad_sequences

    print("Analyzing Paragraph")

    def load_dataset(data):
        origin_data = pd.read_excel(data)
        if len(origin_data.columns) == 9:
            origin_data.columns = [
                'doc_id', 'par_id', 'art_id', 'line_id', 'text', 'par_label',
                'line_label', 'none1', 'none2'
            ]
            origin_data['split_id'] = origin_data['doc_id'].map(
                str) + '_' + origin_data['par_label']
        elif len(origin_data.columns) == 7:
            origin_data.columns = [
                'doc_id', 'par_id', 'art_id', 'line_id', 'text', 'par_label',
                'line_label'
            ]
            origin_data['split_id'] = origin_data['doc_id'].map(
                str) + '_' + origin_data['par_label']
        else:
            raise ValueError("Columns is not 7 or 9!")
        return origin_data

    def join_date(original_data):
        p = re.compile(
            '[0-9]{4}[ .년]{0,3}[0-9]{1,2}[ .월]{0,3}[0-9]{1,2}[ .일]{0,3}')
        split_date_idx = [
            idx for idx, [lines, labels] in enumerate(original_data[
                ['text', 'line_label']].values)
            if len(p.findall(str(lines))) == 1 and labels == 'PR-04-13'
            and len(lines) <= 15
        ]
        date_diff = [
            i for i in range(len(split_date_idx) - 1)
            if split_date_idx[i + 1] - split_date_idx[i] >= 3
        ]

        try:
            seq_date_idx = []
            seq_date_idx.append(split_date_idx[0:date_diff[0] + 1])
            for i in range(len(date_diff) - 1):
                seq_date_idx.append(split_date_idx[date_diff[i] +
                                                   1:date_diff[i + 1] + 1])
            for j in seq_date_idx:
                original_data.iloc[j[0], 4] = ' '.join(
                    original_data.iloc[j]['text'].values)
            processed_data = original_data.drop(
                np.concatenate([i[1:] for i in seq_date_idx]))
        except:
            processed_data = original_data
        return processed_data

    def document_label_dataset(processed_data):
        processed_data = processed_data.reset_index()
        contents = processed_data.iloc[:, 5].tolist()

        temp = []
        for text in processed_data['text']:
            try:
                result = title_catcher(text)
                temp.append(result)
            except BaseException:
                temp.append(False)
        processed_data['title'] = temp

        start_idx = processed_data[processed_data['title'] ==
                                   True].index.tolist()
        end_idx = start_idx[1:]
        end_idx.append(processed_data.index[-1] + 1)

        contract = []
        for start, end in zip(start_idx, end_idx):
            temp = processed_data['text'][start:end]
            contract.append(list(temp.values))

        new_df = pd.DataFrame({"doc": contract}).reset_index()
        return new_df

    def split_newdataset(data, standard, seed):
        contract_names = np.unique(data[standard])
        x_all = []

        for name in contract_names:
            temp = data[data[standard] == name]
            temp_contract = []

            for c in temp['doc'].values:
                temp_contract.append(c)
            x_all.append(temp_contract)

        return x_all

    def make_paragraph_x_dataset(x):
        return [x[paragraph][0] for paragraph in range(len(x))]

    def make_paragraph_y_dataset(y):
        return [y[paragraph][0].split(',') for paragraph in range(len(y))]

    def text_preprocess(text):
        text = preprocess(text)
        text = title_process(text)
        text = time_process(text)
        text = date_process(text)
        text = phone_process(text)
        text = re.sub('[^가-힣".,()~%_ ]+', '', text)
        try:
            text = ' '.join(np.array(komoran.pos(text))[:, 0])
        except BaseException:
            text = '_빈칸_'
        return text

    def word2idx(text):
        try:
            re_text = re.sub('[^가-힣".,()~%_ ]+', '', text)
            re_text = re.sub('[^가-힣_]+', 'PUNC', re_text)
            return vocab_to_int[re_text]
        except BaseException:
            return 1

    def sentence2idx(sentence):
        p = re.compile('([ㄱ-ㅎㅏ-ㅣ]+)')
        return [
            word2idx(word) for word in sentence.split()
            if len(p.findall(word)) == 0
        ]

    def contract2idx(contract, max_len):
        temp = [sentence2idx(text_preprocess(line)) for line in contract]
        return pad_sequences(temp, maxlen=max_len)

    def x_dataset(contracts, max_row, max_len):
        contracts = [contract2idx(contract, max_len) for contract in contracts]
        return pad_sequences(contracts, maxlen=max_row, padding='post')

    def y_dataset(labels):
        output = np.zeros(class_size)
        for label in labels:
            if label in label2num.keys():
                output += np.eye(class_size)[label2num[label]]
        return output

    class AttentionLayer(Layer):
        def __init__(self, attention_dim=100, **kwargs):
            self.attention_dim = attention_dim
            super(AttentionLayer, self).__init__(**kwargs)

        def build(self, input_shape):
            self.W = self.add_weight(name='Attention_Weight',
                                     shape=(input_shape[-1],
                                            self.attention_dim),
                                     initializer='random_normal',
                                     trainable=True)
            self.b = self.add_weight(name='Attention_Bias',
                                     shape=(self.attention_dim, ),
                                     initializer='random_normal',
                                     trainable=True)
            self.u = self.add_weight(name='Attention_Context_Vector',
                                     shape=(self.attention_dim, 1),
                                     initializer='random_normal',
                                     trainable=True)
            super(AttentionLayer, self).build(input_shape)

        def call(self, x):
            u_it = K.tanh(K.dot(x, self.W) + self.b)
            a_it = K.dot(u_it, self.u)
            a_it = K.squeeze(a_it, -1)
            a_it = K.softmax(a_it)
            return a_it

        def compute_output_shape(self, input_shape):
            return (input_shape[0], input_shape[1])

    def WeightedSum(attentions, representations):
        repeated_attentions = RepeatVector(
            K.int_shape(representations)[-1])(attentions)
        repeated_attentions = Permute([2, 1])(repeated_attentions)
        aggregated_representation = Multiply()(
            [representations, repeated_attentions])
        aggregated_representation = Lambda(lambda x: K.sum(x, axis=1))(
            aggregated_representation)
        return aggregated_representation

    def SenWeightedSum(attentions, representations):
        repeated_attentions = RepeatVector(
            K.int_shape(representations)[-1])(attentions)
        repeated_attentions = Permute([2, 1])(repeated_attentions)
        aggregated_representation = Multiply()(
            [representations, repeated_attentions])
        aggregated_representation = Lambda(lambda x: K.sum(x, axis=1))(
            aggregated_representation)
        return aggregated_representation

    def Hie_Attention():
        embedding_layer = Embedding(input_dim=max_nb_words,
                                    output_dim=embedding_dim,
                                    input_length=max_len,
                                    trainable=True,
                                    mask_zero=False)

        # Sentence Encoder
        sentence_input = Input(shape=(max_len, ), name='sentence_input')
        embedded_sentence = embedding_layer(sentence_input)
        contextualized_sentence = Bidirectional(
            CuDNNLSTM(lstm_dim, return_sequences=True),
            name="WORD_BiLSTM")(embedded_sentence)
        word_attention = AttentionLayer(attention_dim)(contextualized_sentence)
        sentence_representation = WeightedSum(word_attention,
                                              contextualized_sentence)
        sentence_encoder = Model(inputs=[sentence_input],
                                 outputs=[sentence_representation])

        # Document Encoder
        document_input = Input(shape=(
            max_row,
            max_len,
        ),
                               name='document_input')
        embedded_document = TimeDistributed(sentence_encoder)(document_input)
        contextualized_document = Bidirectional(
            CuDNNLSTM(lstm_dim, return_sequences=True),
            name="SENTENCE_BiLSTM")(embedded_document)

        sentence_attention = AttentionLayer(attention_dim)(
            contextualized_document)
        document_representation = SenWeightedSum(sentence_attention,
                                                 contextualized_document)
        layer = Dense(dense_size, activation='relu')(document_representation)
        output = Dense(class_size, activation='sigmoid')(layer)
        model = Model(inputs=[document_input], outputs=[output])

        # Attention Extractor
        word_attention_extractor = Model(inputs=[sentence_input],
                                         outputs=[word_attention])
        word_attentions = TimeDistributed(word_attention_extractor)(
            document_input)
        attention_extractor = Model(inputs=[document_input],
                                    outputs=[sentence_attention])
        model.compile(loss='binary_crossentropy',
                      optimizer=Adam(learning_rate),
                      metrics=['accuracy'])
        return model, attention_extractor

    def vecs2labels(vecs):
        output = []
        for i, vec in enumerate(vecs):
            if vec == 1:
                output.append(num2label[i])
        return output

    def model_output(output_):
        return [[vecs2labels((output > threshold) * 1)] for output in output_]

    def model_pred(model, input_):
        out = []
        for contract in input_:
            out.extend(model.predict(np.array([contract])))
        return [[vecs2labels((output > threshold) * 1)] for output in out]

    def model_probability(model, input_):
        out = []
        for contract in input_:
            out.extend(model.predict(np.array([contract])))
        return [output for output in out]

    def multilabel_evaluate(class_pred, output, original_x):
        accuracy = []
        class_pred = [class_pred[label][0] for label in range(len(class_pred))]
        output = [output[label][0] for label in range(len(output))]
        for i, contract in enumerate(original_x):
            contract_class_pred = class_pred[i]
            contract_output = output[i]

            ans = set(contract_class_pred)
            pred = set(contract_output)

            if (pred <= ans and len(pred) > 0) or (len(pred) == 0
                                                   and len(ans) == 0):
                score = 1
            else:
                score = 0
            accuracy.append(score)
        return np.mean(accuracy)

    fm.get_fontconfig_fonts()
    font_location = './font/H2GTRE.TTF'
    font_name = fm.FontProperties(fname=font_location).get_name()
    plt.rc('font', family=font_name)
    plt.rcParams.update({'figure.max_open_warning': 0})

    print("Load Data directory:", data)
    original_data = load_dataset(data)
    processed_data = join_date(original_data)
    new_df = document_label_dataset(processed_data)
    x_all = split_newdataset(new_df, 'index', 1103)
    x_all = make_paragraph_x_dataset(x_all)

    para_index = pd.read_excel('./data/index_par_label.xlsx')
    para_index = para_index.iloc[:, 1:]
    para_dict = {}
    for i in para_index.values:
        para_dict[i[0]] = i[1]
    label2num = para_dict
    num2label = {word: i for i, word in label2num.items()}

    class_size = len(num2label)

    max_row = 100
    max_len = 200

    komoran = Komoran(userdic='preprocessing/userdict_190411.txt')

    with open('./preprocessing/para_int_to_vocab.pickle', 'rb') as f:
        int_to_vocab = pickle.load(f)

    with open('./preprocessing/para_vocab_to_int.pickle', 'rb') as f:
        vocab_to_int = pickle.load(f)

    x_all_ = x_dataset(x_all, max_row, max_len)

    max_nb_words = len(int_to_vocab) + 1
    embedding_dim = 200
    attention_dim = 100
    lstm_dim = 100
    learning_rate = 0.0001
    dense_size = 256

    print("Load Pragraph Model")
    model, attention_extractor = Hie_Attention()
    model.load_weights("model/para/model_30.h5")
    model.compile(loss="binary_crossentropy",
                  optimizer=Adam(learning_rate),
                  metrics=['accuracy'])

    attention_distribution = attention_extractor.predict(x_all_)
    df_result = pd.DataFrame()

    sentence_value, attention_value = [], []
    for sentence, attention in zip(x_all, attention_distribution):
        if len(sentence) <= 100:
            sentence_value.append(sentence[:len(sentence)])
            attention_value.append(attention[:len(sentence)])
        else:
            sentence_value.append(sentence[:100])
            attention_value.append(attention[:100])

    if visulize == 'on':
        print('paragraph visual file extracting...')
        for idx_1, sentence_attention in enumerate(
                zip(sentence_value, attention_value)):
            sentence = sentence_attention[0]
            attention = sentence_attention[1]
            tmp1 = np.array(attention).reshape(-1, 1)
            tmp2 = np.array(sentence).reshape(-1, 1)

            fig, ax = plt.subplots(figsize=(10, len(tmp1)))
            ax.tick_params(labelsize=20)
            ax = sns.heatmap(data=tmp1,
                             yticklabels=tmp2,
                             annot=True,
                             cmap="Reds",
                             annot_kws={"size": 30},
                             cbar=False)
            plt.savefig("./output/paragraph/output_par_vis/" +
                        "paragraph_attention_sequence_" + str(idx_1 + 1) +
                        ".png",
                        bbox_inches="tight")
            plt.close(fig)
        print('Check the /output/paragraph/output_par_dis, output_par_vis',
              end='\n\n')
    else:
        print('skip visualize', end='\n\n')

    df_sample = pd.DataFrame({
        "Sentence": sentence_value,
        "Attention": attention_value
    })
    df_result = pd.concat([df_result, df_sample], axis=0)

    df_result.to_excel(
        'output/paragraph/output_par_dis/paragraph_distribution.xlsx',
        encoding="utf-8")  # Data to Excel
    paragraph_prob = model_probability(model, x_all_)
    return paragraph_prob, num2label
Example #16
0
def test_get_fontconfig_fonts():
    assert sys.platform == 'win32' or len(get_fontconfig_fonts()) > 1
Example #17
0
File: f2.py Project: jpcoles/ZM
import matplotlib as mpl
from matplotlib import rc
from operator import add

import matplotlib.font_manager as fm
print fm.get_fontconfig_fonts()

mpl.rcParams['toolbar'] = 'None'
print filter(lambda x: x.startswith('font'), mpl.rcParams.keys())
#rc('font',**{'name': 'Univers LT STd', 'size':20})
#rc('font',**{'family':'serif','serif':['Computer Modern Roman']})
#rc('text', usetex=True)
rc('font', size=20)
#rc('font', **{'family':'UniversLTStd-Cn'})
#rc('font', **{'family':'Univers LT Std 57 Cn', 'serif':'57 Cn', 'sans-serif':'57 Cn'})
rc('font', **{'family':'Univers LT Std 57 Cn'})
rc('text', color='w')
rc('grid', color='w')
rc('axes', edgecolor='w')
rc('axes', facecolor='k')
rc('axes', labelcolor='w')
rc('axes', linewidth=2)
rc('ytick', color='w')
rc('xtick', color='w')
rc('xtick.major', pad=10)
rc('ytick.major', pad=10)
rc('text', dvipnghack=True)

_ = lambda x: x

import wxversion
def main(prob_pkl,num2label,data,visualize):

    import numpy as np
    import pandas as pd
    import sys
    sys.path.append('..')
    from preprocessing.input_data_index_embedding import load_dataset,split_dataset,text_preprocess,join_date,bow_vocab,load_bow_vocab,bow_label,max_length,x_data_set,y_data_set,int_to_label,labels_to_vecs,make_index_embed,evaluate,split_newdataset_sw,split_ptl_inference,document_label_dataset_training,document_label_dataset_infer,tagging_row_index,row_embed,vecs2labels
    from preprocessing.preprocessing_code_190418 import title_catcher, preprocess, date_process
    import pickle
    import re

    from sklearn.model_selection import train_test_split
    from sklearn.metrics import confusion_matrix, classification_report, precision_score, recall_score
    from sklearn.metrics import classification_report
    from sklearn.preprocessing import OneHotEncoder

    from collections import Counter
    from konlpy.tag import Komoran

    import keras
    import matplotlib.pyplot as plt
    from keras.layers import Input, Embedding, Dense, LSTM, Bidirectional, Dropout, Concatenate, Flatten, Conv1D, Conv2D, GlobalMaxPooling1D, TimeDistributed, SpatialDropout1D, GRU, multiply, Lambda, Reshape, CuDNNGRU, CuDNNLSTM, Permute, RepeatVector, Multiply
    from keras.layers import MaxPool1D
    from keras.models import Model, Sequential
    from keras import backend as K
    from keras.callbacks import ModelCheckpoint
    from keras.optimizers import Adam
    from keras.engine.topology import Layer
    from keras.preprocessing.sequence import pad_sequences
    from keras import regularizers
    from keras.layers.normalization import BatchNormalization
    from keras.layers import Activation
    import seaborn as sn

    import tensorflow as tf
    import warnings
    import os
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
    warnings.filterwarnings("ignore")
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    print("Analyzing Sentence")

    test_prob = prob_pkl
    data_dir = data
    vocab_to_int_dir = './preprocessing/insu_vocab_to_int.pkl'
    int_to_vocab_dir = './preprocessing/insu_int_to_vocab.pkl'

    origin_data = load_dataset(data_dir)
    origin_data = join_date(origin_data)
    origin_data.head(3)

    ptl_df = document_label_dataset_infer(origin_data)
    ptl_df.head(3)

    def split_newdataset_sw_(data, standard):
        contract_names = np.unique(data[standard])

        x_all = []

        for name in contract_names:
            temp = data[data[standard] == name]
            temp_contract = []

            for c in temp['doc'].values:
                temp_contract.append(c)
            x_all.append(temp_contract)
        x_all = [x_all[con][0] for con in range(len(x_all))]

        return x_all

    x_all =  split_ptl_inference(ptl_df, 'index')

    valid_class = np.array([
    '-', 'PR-02-01', 'PR-02-02', 'PR-02-03', 'PR-02-04', 'PR-02-05',
        'PR-02-06', 'PR-02-07', 'PR-02-08', 'PR-02-09', 'PR-02-10',
        'PR-02-11', 'PR-02-12', 'PR-02-13', 'PR-02-14', 'PR-02-15',
        'PR-02-16', 'PR-02-17', 'PR-02-18', 'PR-02-19', 'PR-02-20',
        'PR-02-21', 'PR-02-22', 'PR-02-24', 'PR-02-25', 'PR-02-26',
        'PR-02-27', 'PR-02-28', 'PR-02-29', 'PR-02-30', 'PR-02-31',
        'PR-02-32', 'PR-03-01', 'PR-03-02', 'PR-03-03', 'PR-03-04',
        'PR-04-01', 'PR-04-02', 'PR-04-03', 'PR-04-04', 'PR-04-05',
        'PR-04-06', 'PR-04-07', 'PR-04-08', 'PR-04-09', 'PR-04-10',
        'PR-04-11', 'PR-04-12', 'PR-04-13', 'PR-04-14', 'PR-04-15',
        'PR-04-16', 'PR-04-17', 'PR-04-18', 'PR-04-19', 'PR-04-20',
        'PR-04-21', 'PR-04-22', 'PR-04-23', 'PR-04-24', 'PR-04-25',
        'PR-04-26', 'PR-04-27', 'PR-04-28', 'PR-04-29', 'PR-04-30',
        'PR-04-31', 'PR-04-32', 'PR-04-33', 'PR-04-34', 'PR-04-35',
        'PR-04-36', 'PR-04-37', 'PR-04-38', 'PR-04-39', 'PR-04-40',
        'PR-04-41', 'PR-04-42', 'PR-04-43', 'PR-04-44', 'PR-04-45',
        'PR-04-46', 'PR-04-47', 'PR-04-48', 'PR-04-49', 'PR-04-50',
        'PR-04-51', 'PR-04-52', 'PR-05-01', 'PR-05-02', 'PR-05-03',
        'PR-05-04', 'PR-05-05', 'PR-05-06', 'PR-05-07', 'PR-05-08',
        'PR-05-09', 'PR-05-10', 'PR-05-11', 'PR-05-12', 'PR-05-13',
        'PR-05-14', 'PR-05-15', 'PR-05-16', 'PR-05-17', 'PR-05-18',
        'PR-05-19', 'PR-05-20', 'PR-05-21', 'PR-06-01', 'PR-06-02',
        'PR-06-03', 'PR-06-04', 'PR-06-05', 'PR-06-06', 'PR-06-07',
        'PR-06-08', 'PR-06-09', 'PR-06-10', 'PR-06-11', 'PR-06-12',
        'PR-06-13', 'PR-06-14', 'PR-07-01', 'PR-07-02', 'PR-07-03',
        'PR-07-04', 'PR-07-05', 'PR-07-06', 'PR-07-07', 'PR-08-01',
        'PR-08-02', 'PR-08-03', 'PR-08-04', 'PR-08-05', 'PR-08-06',
        'PR-08-07', 'PR-08-08', 'PR-08-09', 'PR-08-10', 'PR-08-11',
        'PR-08-12', 'PR-08-13', 'PR-08-14', 'PR-08-15', 'PR-08-16',
        'PR-08-17', 'PR-08-18', 'PR-08-19', 'PR-08-20', 'PR-08-21',
        'PR-08-22', 'PR-08-23', 'PR-08-24', 'PR-08-25', 'PR-08-26',
        'PR-08-27', 'PR-08-28', 'PR-08-29', 'PR-09-01', 'PR-09-02',
        'PR-09-03', 'PR-09-04', 'PR-09-05', 'PR-09-06', 'PR-09-07',
        'PR-09-08', 'PR-09-09', 'PR-09-10', 'PR-09-11', 'PR-09-12',
        'PR-09-13', 'PR-09-14', 'PR-09-15', 'PR-10-01', 'PR-10-02',
        'PR-10-03', 'PR-10-04', 'PR-10-05', 'PR-10-06', 'PR-11-01',
        'PR-11-02', 'PR-11-03', 'PR-11-04', 'PR-11-05', 'PR-11-07',
        'PR-11-08', 'PR-12-01', 'PR-13-01', 'PR-13-02', 'PR-13-03',
        'PR-14-01', 'PR-14-02', 'PR-14-03', 'PR-14-04', 'PR-14-05',
        'PR-14-06', 'PR-15-01', 'PR-16-01', 'PR-17-01', 'PR-18-01',
        'PR-19-01', 'PR-19-02', 'PR-20-01', 'PR-20-02', 'PR-20-03',
        'PR-20-04', 'PR-20-05', 'PR-20-06', 'PR-20-07', 'PR-20-08',
        'PR-20-09', 'PR-20-10', 'PR-20-11', 'PR-20-12', 'PR-20-13',
        'PR-20-14', 'PR-21-01', 'PR-21-02', 'PR-22-01', 'PR-23-01',
        'PR-23-02', 'PR-23-03', 'PR-23-04', 'PR-23-05', 'PR-23-06',
        'PR-24-01', 'PR-24-02', 'PR-24-03', 'PR-24-04', 'PR-24-05',
        'PR-24-06', 'PR-24-07', 'PR-24-08', 'PR-24-09', 'PR-24-10',
        'PR-25-01', 'PR-25-02', 'PR-25-03', 'PR-25-04', 'PR-26-01',
        'PR-27-01', 'PR-28-01', 'PR-29-01', 'PR-29-02', 'PR-29-03',
        'PR-29-04', 'PR-29-05', 'PR-29-06', 'PR-30-01'])

    class_size = len(valid_class)

    vocab_to_int, int_to_vocab = load_bow_vocab(vocab_to_int_dir, int_to_vocab_dir)

    max_len = 350
    max_row = 121

    unique_par_class = ['정의', '인수_및_모집', '본_사채의_발행조건', '수요예측', '모집관계사항', '불성실_수요예측_참여자의_관리', '발행회사의_보장', '기업실사',
                        '확약_또는_선약', '인수시기', '제서식의_작성_및_공고', '수수료', '비용', '원리금_상환사무의_대행', '인수_및_모집_일정의_변경', '사채권의_발행여부',
                        '채권상장신처_및_채권등록_발행', '특약사항', '사채금_사용용도', '원리금_지급의무', '책임부담', '해지_또는_해제', '통보_및_요청', '자료제출',
                        '평과결과_공시_등', '관할법원', '계약의_해석원칙_등', '공모금리_결정_및_배정', '개별책임']

    label_to_int = bow_label(unique_par_class)[0]

    test_proba = np.concatenate([[test_prob[num] for cnt in x_all[num]] for num in range(len(x_all))])

    x_row = tagging_row_index(x_all)
    x_row_ = row_embed(x_row, max_row)
    x_all_ = x_data_set(x_all, max_len, vocab_to_int)

    n_words = len(int_to_vocab) + 2
    embed_size = 100

    batch_size = 8
    learning_rate = 0.0001
    epochs = 500

    sentence_wise_lstm_size = 128

    dense_dropout = 0.5
    l2_reg = regularizers.l2(0.0001)

    dense_size = 128
    attention_dim = 100
    rnn_dim = 256

    class AttentionLayer(Layer):
        def __init__(self, attention_dim, **kwargs):
            self.attention_dim = attention_dim
            super(AttentionLayer, self).__init__(**kwargs)

        def build(self, input_shape):
            self.W = self.add_weight(name='Attention_Weight',
                                    shape=(input_shape[-1], self.attention_dim),
                                    initializer='random_normal',
                                    trainable=True)
            self.b = self.add_weight(name='Attention_Bias',
                                    shape=(self.attention_dim, ),
                                    initializer='random_normal',
                                    trainable=True)
            self.u = self.add_weight(name='Attention_Context_Vector',
                                    shape=(self.attention_dim, 1),
                                    initializer='random_normal',
                                    trainable=True)
            super(AttentionLayer, self).build(input_shape)

        def call(self, x):
            u_it = K.tanh(K.dot(x, self.W) + self.b)
            a_it = K.dot(u_it, self.u)
            a_it = K.squeeze(a_it, -1)
            a_it = K.softmax(a_it)
            return a_it

        def compute_output_shape(self, input_shape):
            return (input_shape[0], input_shape[1])

    def WeightedSum(attentions, representations):
        repeated_attentions = RepeatVector(K.int_shape(representations)[-1])(attentions)

        repeated_attentions = Permute([2, 1])(repeated_attentions)

        aggregated_representation = Multiply()([representations, repeated_attentions])
        aggregated_representation = Lambda(lambda x: K.sum(x, axis=1))(aggregated_representation)

        return aggregated_representation

    def SenWeightedSum(attentions, representations):
        repeated_attentions = RepeatVector(K.int_shape(representations)[-1])(attentions)
        repeated_attentions = Permute([2, 1])(repeated_attentions)
        aggregated_representation = Multiply()([representations, repeated_attentions])
        return aggregated_representation

    def TabSen():

        K.clear_session()
        np.random.seed(1201)

        row_embed = Input(shape = (max_row, ), name = 'row_input')
        col_embed = Input(shape = (len(unique_par_class), ), name = 'col_input')

        row_layer = Dense(128)(row_embed)
        col_layer = Dense(128)(col_embed)

        word_inp_embed = Input(shape = (None, ), name = 'word_input')
        word_embed = Embedding(n_words, embed_size, trainable = True)(word_inp_embed)

        lstm = Bidirectional(CuDNNLSTM(sentence_wise_lstm_size, return_sequences=True))(word_embed)
        lstm_bn = BatchNormalization()(lstm)

        attn_score = AttentionLayer(attention_dim)(lstm_bn)
        attn_out = WeightedSum(attn_score, lstm_bn)

        concat = Concatenate()([attn_out, row_layer, col_layer])

        fc_layer = Dense(dense_size,
                    activation='relu',
                    kernel_regularizer = keras.regularizers.l2(1e-5),
                    bias_regularizer = keras.regularizers.l1(1e-3))(concat)
        dropout = Dropout(dense_dropout)(fc_layer)
        output = Dense(class_size, activation = 'softmax')(dropout)

        model = Model(inputs = [word_inp_embed, row_embed, col_embed], outputs = output)

        word_attention_extractor = Model(inputs=[word_inp_embed],
                                        outputs=[attn_score])

        model.compile(loss = 'categorical_crossentropy', optimizer = Adam(learning_rate), metrics = ['accuracy'])
        return model, word_attention_extractor

    print("Load Sentence Model")
    tabsen, word_attention_extractor = TabSen()
    tabsen.load_weights('./model/sentence/33-0.1379.hdf5')

    def int_to_label(y_vectors, valid_class):
        enc = OneHotEncoder(handle_unknown='ignore')
        enc.fit(valid_class.reshape(-1,1))
        labels = enc.inverse_transform(y_vectors)
        return labels

    int_to_vocab[0] = 'pad'
    int_to_vocab[1] = 'UNK'

    import matplotlib
    import matplotlib.font_manager as fm
    fm.get_fontconfig_fonts()
    font_location = 'font/malgun.ttf' # For Windows
    font_name = fm.FontProperties(fname=font_location).get_name()

    init_x_dict = {}
    init_x_dict['sequence'] = x_all_

    split = 'sequence'
    threshold=0.5

    pred_attention = word_attention_extractor.predict(init_x_dict[split])
    pred=tabsen.predict([x_all_, x_row_, test_proba])
    labels = [i for i in np.concatenate(int_to_label(pred, valid_class))]

    if visualize=='on' :
        print('sentence visual file extracting...')
        plt.rcParams.update({'figure.max_open_warning': 0})
        words_list = []
        for sent_idx, sentence in enumerate(init_x_dict[split]):
            if sentence[0] == 0:
                continue

            for word_idx in range(max_len):
                if sentence[word_idx] == 0:
                    words = [int_to_vocab[word_id] for word_id in sentence[0:word_idx]]
                    pred_att = pred_attention[sent_idx][0:len(words)]
                    pred_att = np.expand_dims(pred_att, axis=0)
                    break

            fig, ax = plt.subplots(figsize=(len(words), 1))
            plt.rc('font', family=font_name)
            plt.rc('xtick', labelsize=12)
            midpoint = (max(pred_att[:, 0]) - min(pred_att[:, 0])) / 2
            heatmap = sn.heatmap(pred_att, xticklabels=words, yticklabels=False, square=True, linewidths=0.1, cmap='coolwarm', center=midpoint, vmin=0, vmax=1)
            words_list.append([np.array(pred_att[0]), words, labels[sent_idx]])
            plt.xticks(rotation=45)
            plt.title(labels[sent_idx],)
            fig = plt.gcf()
            fig.savefig('./output/sentence/output_sen_vis/sentence_attention_{}_{}'.format(split,sent_idx+1), bbox_inches = "tight")
            plt.close(fig)

        scores = [i[0] for i in words_list]
        tokens = [i[1] for i in words_list]
        for sen_idx, (score,token) in enumerate(zip(scores, tokens)) :
            df = pd.DataFrame(score,token).T
            df.to_excel('./output/sentence/output_sen_dis/sentence_distribution_{}_{}.xlsx'.format(split, sen_idx+1), index=False)
        print('Check the /output/sentence/output_sen_dis, output_sen_vis', end='\n\n')
    elif visualize=='off':
        print('skip visualze', end='\n\n')
    else :
        print('visualize param check!')

    line_index = pd.read_excel('./data/index_line_label.xlsx', header=None)

    line_dict = {}
    for i in line_index.values:
        line_dict[i[1]] = i[0]
    line_dict['-'] = '-'

    para_dict = num2label
    output_filename = './output/contract_tagging.xlsx'
    def output_prediction(infer_pred):
        pred_label = [line_dict[i] for i in np.concatenate(int_to_label(infer_pred, valid_class))]

        par_pred = [np.where(i>=threshold)[0] for i in test_prob]
        par_pred_label = [', '.join([para_dict[j] for j in i]) for i in par_pred]
        par_pred_length = [len(i) for i in ptl_df.doc.values]
        par_pred_label_ = np.concatenate([length*[pars] for length,pars in zip(par_pred_length, par_pred_label)])
        
        origin_data['line_label'] = pred_label
        
        origin_data['par_label'] = par_pred_label_

        output = origin_data.iloc[:,:-1]
        output.columns = ['문서번호','문단번호','조항번호','라인번호','내용','문단클래스','라인클래스']
        output.to_excel(output_filename, index=False)

        return output

    output = output_prediction(pred)
    print("Finish Contract Analysis")
    print("Check the Output file ", output_filename)
Example #19
0
    def portfolio_viz(self):
        self.to_home_button.on_click(self.RED_start)

        if (self.user_info[6] == self.know_list[0]) or (self.user_info[6]
                                                        == self.know_list[1]):
            danger = Image.open(self.path +
                                "/red/interface/image/portfolio/위험고지.png")
            display(danger)

        # 관심 산업 상관관계 보여주기
        if self.user_info[5] == self.sector_list[0]:
            s1 = Image.open(self.path +
                            "/red/interface/image/industry/건설양.png")
            s2 = Image.open(self.path +
                            "/red/interface/image/industry/건설음.png")
            display(s1)
            display(s2)
        elif self.user_info[5] == self.sector_list[5]:
            s3 = Image.open(self.path +
                            "/red/interface/image/industry/운수장비음.png")
            display(s3)
        elif self.user_info[5] == self.sector_list[7]:
            s4 = Image.open(self.path +
                            "/red/interface/image/industry/의약음.png")
            display(s4)

        # 포트폴리오 비율
        capital = self.user_info[0] * 10000
        if self.user_info[7] == self.risk_list[0]:
            r1 = 1
            r2 = 0.67
        elif self.user_info[7] == self.risk_list[1]:
            r1 = 0.8
            r2 = 0.4
        elif self.user_info[7] == self.risk_list[2]:
            r1 = 0.6
            r2 = 0.3
        elif self.user_info[7] == self.risk_list[3]:
            r1 = 0.4
            r2 = 0.1
        elif self.user_info[7] == self.risk_list[4]:
            r1 = 0.2
            r2 = 0

        if self.user_info[1] == self.term_list[0] or self.user_info[
                1] == self.term_list[1]:
            r2 = 0

        real_r0 = int((1 - r1) * 100)
        real_r1 = int((r1 - r2) * 100)
        real_r2 = int(r2 * 100)

        p_profit = 0
        p_sigma = 0
        p_num = 0
        p_ratio = 0
        for equity in (self.portfolios1, self.portfolios2, self.portfolios3):
            p_num += 1
            if p_num == 1:
                p_ratio = 1 - r1
            elif p_num == 2:
                p_ratio = r2
            else:
                p_ratio = r1 - r2
            cnt = 0
            profit = 0
            sigma = 0

            for name, info in equity.items():
                cnt += info[0]
                profit += info[1][-2] * info[0]
                sigma += info[1][-1] * info[0]
            if cnt > 0:
                profit /= cnt
                sigma /= cnt

            p_profit += profit * p_ratio
            p_sigma += sigma * p_ratio

        수익률 = round(((1 + p_profit / 100)**12 - 1) * 100, 2)
        표준편차 = round(p_sigma * 100, 2)

        # 파이 차트 생성
        if r2 == 0:
            ratio = [real_r0, real_r1]
            labels = ["주식", "일반 ETF"]
            colors = ["silver", "gold"]
            wedgeprops = {"width": 0.7, "edgecolor": "w", "linewidth": 5}

            fm.get_fontconfig_fonts()
            font_name = fm.FontProperties(fname=self.fontpath).get_name()
            matplotlib.rc("font", family=font_name)

            fig = plt.figure(figsize=(7, 7))

            plt.pie(
                ratio,
                labels=labels,
                startangle=90,
                autopct="%.0f%%",
                shadow=True,
                textprops={"fontsize": 20},
                colors=colors,
                wedgeprops=wedgeprops,
            )
            if real_r0 == 19:
                plt.legend(labels, fontsize=13, loc="lower left")
            else:
                plt.legend(labels, fontsize=13, loc="upper left")
            plt.savefig(self.path +
                        "/red/interface/image/portfolio/pie_chart.png")
            plt.close()
        else:
            ratio = [real_r0, real_r1, real_r2]
            labels = ["주식", "일반 ETF", "채권 ETF"]
            colors = ["silver", "gold", "lightgray"]
            wedgeprops = {"width": 0.7, "edgecolor": "w", "linewidth": 5}

            fm.get_fontconfig_fonts()
            font_name = fm.FontProperties(fname=self.fontpath).get_name()
            matplotlib.rc("font", family=font_name)

            fig = plt.figure(figsize=(7, 7))

            plt.pie(
                ratio,
                labels=labels,
                startangle=90,
                autopct="%.0f%%",
                shadow=True,
                textprops={"fontsize": 20},
                colors=colors,
                wedgeprops=wedgeprops,
            )
            if real_r0 == 19:
                plt.legend(labels, fontsize=13, loc="lower right")
            else:
                plt.legend(labels, fontsize=13, loc="lower left")
            plt.savefig(self.path +
                        "/red/interface/image/portfolio/pie_chart.png")
            plt.close()

        # 경로별 이미지 불러오기
        im_tend = Image.open(self.path +
                             "/red/interface/image/portfolio/red.png")
        im_chart = Image.open(self.path +
                              "/red/interface/image/portfolio/pie_chart.png")
        font = ImageFont.truetype(self.fontpath, 22)

        # 칼라 설정
        b, g, r, a = 0, 0, 0, 0

        # 이미지에 텍스트 삽입
        draw = ImageDraw.Draw(im_tend)
        draw.text((228, 80.5),
                  "연 " + str(수익률) + "% 내외 추구",
                  font=font,
                  fill=(b, g, r, a))
        draw.text((228, 244),
                  "평균 위험률 연 " + str(표준편차) + "%",
                  font=font,
                  fill=(b, g, r, a))
        draw.text((228, 405),
                  "전체 주식 비중 " + str(real_r0) + "% 수준",
                  font=font,
                  fill=(b, g, r, a))

        # 이미지에 파이차트 삽입
        im_tend.paste(im_chart, (510, 10))

        display(im_tend)
Example #20
0
# from xgboost.compat import XGBLabelEncoder
# => is IDENTICAL to `sklearn.preprocessing.LabelEncoder`

import lime
from lime import lime_tabular

import pdpbox
from pdpbox import pdp, info_plots


for _pkg in [np, pd, skl, xgb, mpl, pdpbox]:
    print(f'{_pkg.__name__:<7} = {_pkg.__version__}')

font_dict = {
    path.split('/')[-1][:-4]: path
    for path in fm.get_fontconfig_fonts()
    if 'dejavu' in path.lower().split('/')[-1]
}

plt.rcParams['font.family'] = sorted(font_dict.keys(), key=len)[0]
# os.chdir('../git/xgboost-lime-pdp')
fpath = '.'


# %% Classes & Functions -----------------------------------------------------

def as_int(string):
    return np.fromstring(string, dtype=np.int64, sep=',')[0]


def as_int_str(string):
Example #21
0
def draw222(values, variables, notch=False, title="boxplot_result"):
    label_title = variables["factor1"]["name"]
    labels = variables["factor1"]["value"]

    top_title = variables["factor2"]["name"]
    titles = variables["factor2"]["value"]

    color_title = variables["mask"]["name"]
    color_names = variables["mask"]["value"]

    value_name = variables["numeric"]["name"]

    left_positions = [-0.4, 0.4]
    right_positions = [1.6, 2.4]
    ticks = [0, 2]

    left_color = ['pink']
    right_color = ['lightgreen']

    #    fontprop = fm.FontProperties("NanumGothic")

    if platform == "linux" or platform == "linux2":
        flist = fm.get_fontconfig_fonts()
        available_fonts = [
            fm.FontProperties(fname=fname).get_name() for fname in flist
        ]
        fontprop = fm.FontProperties("NanumGothic")
    elif platform == "darwin":
        fontprop = fm.FontProperties("AppleGothic")
    elif platform == "win32":
        fontprop = fm.FontProperties("Malgun Gothic")
    else:
        print(
            "User platform could not be identified. Korean characters may not be shown correctly when visualizing."
        )

    # first plot
    fig = plt.figure(figsize=(10, 8))
    fig.suptitle(top_title + "(" + titles[0] + ")",
                 fontsize=35,
                 fontproperties=fontprop)

    left_group1 = [values[0], values[1]]
    right_group1 = [values[2], values[3]]

    bplot1_1 = plt.boxplot(left_group1[0],
                           widths=0.35,
                           positions=[left_positions[0]],
                           notch=notch,
                           patch_artist=True)
    bplot1_2 = plt.boxplot(left_group1[1],
                           widths=0.35,
                           positions=[left_positions[1]],
                           notch=notch,
                           patch_artist=True)
    bplot2_1 = plt.boxplot(right_group1[0],
                           widths=0.35,
                           positions=[right_positions[0]],
                           notch=notch,
                           patch_artist=True)
    bplot2_2 = plt.boxplot(right_group1[1],
                           widths=0.35,
                           positions=[right_positions[1]],
                           notch=notch,
                           patch_artist=True)
    plt.xticks(ticks, labels, fontsize=15, fontproperties=fontprop)

    for bplot in (bplot1_1, bplot1_2, bplot2_1, bplot2_2):
        if bplot == bplot1_1 or bplot == bplot2_1:
            for patch, color in zip(bplot['boxes'], left_color):
                patch.set_facecolor(color)
        else:
            for patch, color in zip(bplot['boxes'], right_color):
                patch.set_facecolor(color)

    plt.grid(True)
    plt.xlabel(label_title, fontsize=20, fontproperties=fontprop)
    plt.ylabel(value_name, fontsize=20, fontproperties=fontprop)
    plt.legend([bplot1_1["boxes"][0], bplot1_2["boxes"][0]],
               color_names,
               loc='upper right',
               fontsize=15,
               prop=fontprop)
    plt.savefig("./" + title + "1.jpg", dpi=400, fontproperties=fontprop)
    plt.show()

    # second plot
    fig = plt.figure(figsize=(10, 8))
    fig.suptitle(top_title + "(" + titles[1] + ")",
                 fontsize=35,
                 fontproperties=fontprop)

    left_group2 = [values[4], values[5]]
    right_group2 = [values[6], values[7]]

    bplot3_1 = plt.boxplot(left_group2[0],
                           widths=0.35,
                           positions=[left_positions[0]],
                           notch=notch,
                           patch_artist=True)
    bplot3_2 = plt.boxplot(left_group2[1],
                           widths=0.35,
                           positions=[left_positions[1]],
                           notch=notch,
                           patch_artist=True)
    bplot4_1 = plt.boxplot(right_group2[0],
                           widths=0.35,
                           positions=[right_positions[0]],
                           notch=notch,
                           patch_artist=True)
    bplot4_2 = plt.boxplot(right_group2[1],
                           widths=0.35,
                           positions=[right_positions[1]],
                           notch=notch,
                           patch_artist=True)
    plt.xticks(ticks, labels, fontsize=15, fontproperties=fontprop)

    for bplot in (bplot3_1, bplot3_2, bplot4_1, bplot4_2):
        if bplot == bplot3_1 or bplot == bplot4_1:
            for patch, color in zip(bplot['boxes'], left_color):
                patch.set_facecolor(color)
        else:
            for patch, color in zip(bplot['boxes'], right_color):
                patch.set_facecolor(color)

    plt.grid(True)
    plt.xlabel(label_title, fontsize=20, fontproperties=fontprop)
    plt.ylabel(value_name, fontsize=20, fontproperties=fontprop)
    plt.legend([bplot4_1["boxes"][0], bplot4_2["boxes"][0]],
               color_names,
               loc='upper right',
               fontsize=15,
               prop=fontprop)
    plt.savefig("./" + title + "2.jpg", dpi=400)
    plt.show()
Example #22
0
def test_get_fontconfig_fonts():
    assert sys.platform == 'win32' or len(get_fontconfig_fonts()) > 1
Example #23
0
def word2vec_test(file_list, w2v_name) :
    # 단어를 담을 리스트 선언
    total_word_list = list()

    source_dir ='./data/'
    fig_file = '3_word2vec_tsne.png'
    font_name = '/usr/share/fonts/truetype/nanum/NanumBarunGothic.ttf'

    # word2vec 모델 로드
    model = models.Word2Vec.load(source_dir + w2v_name)

    # 품사 태깅 된 데이터 추출 및 리스트 저장
    data_list = list()
    data1 = pre.konlpy_pos_tag('배우')
    data_list.append(data1)
    data2 = pre.konlpy_pos_tag('엄마')
    data_list.append(data2)
    data3 = pre.konlpy_pos_tag('여자')
    data_list.append(data3)
    data4 = pre.konlpy_pos_tag('남자')
    data_list.append(data4)

    # 모델에 적용하여 결과 출력
    # model.doesnt_match, model.most_similar의 method는 4.0.0 버전에서 deprecated
    print(model[data1])
    print(model.wv.doesnt_match(data_list))
    print(model.wv.most_similar(positive=[data1], topn=10))
    print(model.wv.most_similar(positive=[data2, data4], negative=[data3], topn=1))
    print(model.wv.similarity(data1, data2))
    print(model.wv.similarity(data1, data3))

    for file in file_list:
        with open(source_dir + file,'r', encoding='UTF-8') as f:
            load_data = [line.split('\t') for line in f.read().splitlines()]
            for data in load_data :
                total_word_list += data[0].split()

    # 단어 리스트 중 가장 많이 사용된 100개 단어 추출
    counter = Counter(total_word_list).most_common(100)
    word_list = [word[0] for word in counter]
    print(word_list)

    # 설정 가능한 폰트 리스트 출력
    font_list = font_manager.get_fontconfig_fonts()
    print([font for font in font_list if 'nanum' in font])

    # 폰트 설정
    rc('font', family=font_manager.FontProperties(fname=font_name).get_name())

    # 단어에 대한 벡터 리스트
    vector_list = model[word_list]

    # 2차원으로 차원 축소
    transformed = TSNE(n_components=2).fit_transform(vector_list)
    print(transformed)

    # 2차원의 데이터를 x, y 축으로 저장
    x_plot = transformed[:, 0]
    y_plot = transformed[:, 1]

    # 이미지의 사이즈 셋팅
    pyplot.figure(figsize=(10, 10))

    # x, y 축을 점 및 텍스트 표시
    pyplot.scatter(x_plot, y_plot)
    for i in range(len(x_plot)):
        pyplot.annotate(word_list[i], xy=(x_plot[i], y_plot[i]))

    # 이미지로 저장
    pyplot.savefig(source_dir + fig_file)
Example #24
0
import re
from matplotlib import rcParams, pyplot as pp
from cycler import cycler

POLAR_YLIM_CONST = (-18, -6)
POLAR_YLIM_CONST_MEAS = (-22, -10)
POLAR_YLIM_CONST_MEAS = (-15, -3)
POLAR_YLIM_CONST_ALT = (-32, -6)
GAIN_FIXED_YLIM1 = (-20, -0)
GAIN_FIXED_YLIM1_REVISED = (-12, -2)
GAIN_FIXED_YLIM2 = (-30, -0)
GAIN_FIXED_YLIM3 = (-10, 10)
GAIN_FIXED_YLIM4 = (-0, 20)
GAIN_FIXED_YLIM5 = (-20, 0)

fcFontList = FM.get_fontconfig_fonts()
# Search only for fonts that have name matches similar to this
# note this is ALSO a priority list
fontsDesired = ['Times', 'Helvetica', 'Arial']
fontsDesiredRe = re.compile('|'.join(fontsDesired), flags=re.IGNORECASE)
# Create a unique set of the fonts selected out of all of the system fonts
fontsAvailable = frozenset([FM.FontProperties(fname=fcFont).get_name()\
 for fcFont in fcFontList if fontsDesiredRe.search(fcFont) != None])

fontSelected = None
for fontSearch in fontsDesired:
    for fontFound in fontsAvailable:
        if re.search(fontSearch, fontFound, flags=re.IGNORECASE) != None:
            fontSelected = fontFound
            break
    if fontSelected != None:
Example #25
0
def use_kor_font():
    fm.get_fontconfig_fonts()
    font_location = '/Library/Fonts/NanumBarunGothicBold.ttf'
    font_name = fm.FontProperties(fname=font_location).get_name()
    plt.rc('font', family=font_name)
Example #26
0
    def make_graph(self):
        w2v_data = pd.read_csv('data/result/' + self.file_name +
                               '_final_w2v_result.csv',
                               encoding='utf-8-sig')
        cause_df = w2v_data[(w2v_data['type'] == 'C')
                            & (w2v_data['value'] > 0.0)]
        result_df = w2v_data[(w2v_data['type'] == 'R')
                             & (w2v_data['value'] > 0.0)]
        print(cause_df)
        cause_list_seed = cause_df['seed_term'].to_list()
        result_list_seed = result_df['seed_term'].to_list()

        cause_list_weight = cause_df['value'].to_list()
        nsize_cause = np.array([v for v in cause_list_weight])
        nsize_cause = 2000 * (nsize_cause - min(nsize_cause)) / (
            max(nsize_cause) - min(nsize_cause))
        cause_list_weight = nsize_cause.tolist()
        print(cause_list_weight)
        cause_list_terms = cause_df['terms'].to_list()

        result_list_terms = result_df['terms'].to_list()
        result_list_weight = result_df['value'].to_list()

        cause_set = []
        result_set = []

        df_cause = pd.DataFrame({
            'from': cause_list_seed,
            'to': cause_list_terms,
            'weight': cause_list_weight
        })
        df_result = pd.DataFrame({
            'from': result_list_seed,
            'to': result_list_terms,
            'weight': result_list_weight
        })
        print(df_cause)
        # i = 0
        # for s, c, w in zip(cause_list_seed, cause_list_terms, cause_list_weight):
        #     # print(f'[{i}] seed : {s} | term : {c} | weight : {w}')
        #     cause_set.append((s, c, {'weight': w}))
        #     i += 1
        # df_cause = pd.DataFrame({'items':cause_set})
        #
        # j = 0
        # for s, c, w in zip(result_list_seed, result_list_terms, result_list_weight):
        #     # print(f'[{j}] seed : {s} | term : {c} | weight : {w}')
        #     result_set.append((s, c, {'weight': w}))
        #     j += 1
        # df_result = pd.DataFrame({'items':result_set})

        # cause_list = [(s,c, {'weight':w}) for s, c, w in zip(cause_list_seed, cause_list_terms, cause_list_weight)]

        fm.get_fontconfig_fonts()
        # font_location = '/usr/share/fonts/truetype/nanum/NanumGothicOTF.ttf'
        font_location = 'C:/Windows/Fonts/NanumGothic.ttf'  # For Windows
        font_name = fm.FontProperties(fname=font_location).get_name()
        plt.rc('font', family=font_name)

        G_cause = nx.Graph()
        G_cause = nx.from_pandas_edgelist(df_cause,
                                          'from',
                                          'to',
                                          create_using=nx.DiGraph())
        # ar_cause = (df_cause['items'])
        # G_cause.add_edges_from(ar_cause)
        # print(ar_cause)
        G_result = nx.Graph()
        G_result = nx.from_pandas_edgelist(df_result,
                                           'from',
                                           'to',
                                           create_using=nx.DiGraph())
        # ar_result = (df_result['items'])
        # G_result.add_edges_from(ar_result)
        # nsize = np.array([v for v in cause_list_weight])
        # cause_list_weight.insert(0, 1.000)
        # result_list_weight.insert(0, 1.000)

        # nsize_cause = np.array([v for v in cause_list_weight])
        # nsize_cause = 2000 * (nsize_cause-min(nsize_cause)) / (max(nsize_cause)- min(nsize_cause))
        # print(nsize_cause)
        # nsize_result = np.array([v for v in result_list_weight])
        # nsize_result = 2000 * (nsize_result - min(nsize_result)) / (max(nsize_result) - min(nsize_result))
        # print(nsize_cause)
        # nsize_cause = np.insert(nsize_cause, 0, 1000)
        # print(nsize_cause)
        pos_cause = nx.spring_layout(G_cause)
        pos_result = nx.spring_layout(G_result)
        plt.figure(figsize=(16, 12))
        plt.title('원인')
        cmap = cm.get_cmap('Dark2')
        print(G_cause.nodes)
        # nx.draw_networkx(G_cause, font_size=14, font_family=font_name,pos=pos_cause, node_color=list(cause_list_weight), node_size=nsize_cause, alpha=0.7, edge_color='.5', cmap=cmap)
        # nx.draw_networkx(G_cause, font_size=14, font_family=font_name, pos=pos_cause, node_color=list(cause_list_weight), node_size=nsize_cause, alpha=0.7, edge_color='.5', cmap=cmap)

        nx.draw_networkx(G_cause,
                         pos=pos_cause,
                         node_size=1000,
                         node_color='dark',
                         alpha=.1,
                         font_family=font_name,
                         with_labels=True)
        plt.savefig('data/result/' + self.file_name + '_cause.png',
                    bbox_inches='tight')

        nx.draw(G_result, font_family=font_name, with_labels=True)
        # nx.draw_networkx(G_result, font_size=14, font_family=font_name, pos=pos_result, node_color=list(result_list_weight), node_size=nsize_result, alpha=0.7, edge_color='.5', cmap=cmap)
        plt.savefig('data/result/' + self.file_name + '_result.png',
                    bbox_inches='tight')
def test_get_fontconfig_fonts():
    assert len(get_fontconfig_fonts()) > 1
Example #28
0
def plot_heatmap(res, print_std=False, title=None):
    # pX, pZ, result, stop, sbot
    # inn, bc, stuff, speed
    P, S = get_heatmap(res, print_std=print_std)

    lb = -1.5  # leftBorder
    rb = +1.5  # rightBorder

    x = np.arange(lb, rb, 1 / 12)

    if print_std is True:
        bb = -1.5  # bottomBorder
        tb = +1.5  # topBorder
        y = np.arange(bb, tb, 1 / 12)
    else:
        bb = +1.0  # bottomBorder
        tb = +4.0  # topBorder
        y = np.arange(+1.0, +4.0, 1 / 12)
    X, Y = np.meshgrid(x, y)

    fig = plt.figure(figsize=(6, 5), dpi=80, facecolor='white')

    from matplotlib import font_manager, rc
    import os
    if os.name == 'posix':
        import matplotlib.font_manager as fm
        fm.get_fontconfig_fonts()
        font_location = '/Library/Fonts/NanumSquareOTFRegular.otf'
        font_name = fm.FontProperties(fname=font_location).get_name()
        rc('font', family=font_name)
    else:
        rc('font', family='NanumSquare')

    plt.rcParams['axes.unicode_minus'] = False
    #plt.pcolormesh(X, Y, P, cmap='gist_gray')
    plt.pcolormesh(X, Y, P)
    plt.colorbar(format=ticker.FuncFormatter(fmt))

    ll = -17 / 24
    rl = +17 / 24
    oll = -20 / 24
    orl = +20 / 24
    bl = 1.579
    tl = 3.325
    obl = 1.579 - 3 / 24
    otl = 3.325 + 3 / 24

    if print_std is True:
        bl = -1.0
        tl = +1.0
        obl = -1.0 - 3 / 24
        otl = +1.0 + 3 / 24

    plt.plot([ll, ll], [bl, tl], color='#ffffff', linestyle='-', lw=1)
    plt.plot([rl, rl], [bl, tl], color='#ffffff', linestyle='-', lw=1)

    plt.plot([ll, rl], [bl, bl], color='#ffffff', linestyle='-', lw=1)
    plt.plot([ll, rl], [tl, tl], color='#ffffff', linestyle='-', lw=1)

    plt.plot([oll, oll], [obl, otl], color='#ffffff', linestyle='-', lw=1)
    plt.plot([orl, orl], [obl, otl], color='#ffffff', linestyle='-', lw=1)

    plt.plot([oll, orl], [obl, obl], color='#ffffff', linestyle='-', lw=1)
    plt.plot([oll, orl], [otl, otl], color='#ffffff', linestyle='-', lw=1)

    if title is None:
        plt.title('2017 KBO S-Zone heatmap')
    else:
        plt.title(title)

    plt.axis([lb + 1 / 12, rb - 1 / 12, bb + 1 / 12, tb - 1 / 12])
Example #29
0
def test_get_fontconfig_fonts():
    assert len(get_fontconfig_fonts()) > 1
Example #30
0
def plot_match_calls(values, title=None):
    # pX, pZ, result, st, sb
    # inn, bc, stuff, speed
    from matplotlib import font_manager, rc
    import os
    if os.name == 'posix':
        import matplotlib.font_manager as fm
        fm.get_fontconfig_fonts()
        font_location = '/Library/Fonts/NanumSquareOTFRegular.otf'
        font_name = fm.FontProperties(fname=font_location).get_name()
        rc('font', family=font_name)
    else:
        rc('font', family='NanumSquare')

    #Results = Enum('Results', '볼 스트라이크 헛스윙 파울 타격 번트파울 번트헛스윙')
    #Stuffs = Enum('Stuffs', '직구 슬라이더 포크 체인지업 커브 투심 싱커 커터 너클볼')

    bvalues = values[np.where(values[:, 2] == 1)]

    svalues = values[np.where(values[:, 2] == 2)]

    wvalues = values[np.where(values[:, 2] == 3)
                     or np.where(values[:, 2] == 7)]

    fvalues = values[np.where(values[:, 2] == 4)
                     or np.where(values[:, 2] == 6)]

    ivalues = values[np.where(values[:, 2] == 5)]

    # strikes, balls
    fig = plt.figure(figsize=(12, 7), dpi=160, facecolor='#898f99')

    from matplotlib import font_manager, rc
    import os
    if os.name == 'posix':
        import matplotlib.font_manager as fm
        fm.get_fontconfig_fonts()
        font_location = '/Library/Fonts/NanumSquareOTFRegular.otf'
        font_name = fm.FontProperties(fname=font_location).get_name()
        rc('font', family=font_name)
    else:
        rc('font', family='NanumSquare')

    plt.rcParams['axes.unicode_minus'] = False
    ax = fig.add_subplot(231, facecolor='#313133')
    ax.tick_params(axis='x', colors='white')

    lb = -1.5  # leftBorder
    rb = +1.5  # rightBorder
    tb = +4.0  # topBorder
    bb = +1.0  # bottomBorder

    ll = -17 / 24  # leftLine
    rl = +17 / 24  # rightLine
    tl = +3.325  # topLine
    bl = +1.579  # bototmLine

    oll = -17 / 24 - 1 / 8  # outerLeftLine
    orl = +17 / 24 + 1 / 8  # outerRightLine
    otl = +3.325 + 1 / 8  # outerTopLine
    obl = +1.579 - 1 / 8  # outerBottomLine

    if title is not None:
        st = fig.suptitle(title, fontsize=20)
        st.set_color('white')
        st.set_weight('bold')
        st.set_horizontalalignment('center')

    for r in svalues:
        plt.scatter(r[0],
                    r[1],
                    color='#ef2926',
                    alpha=.5,
                    s=np.pi * 50,
                    label='스트라이크')
    for r in bvalues:
        plt.scatter(r[0],
                    r[1],
                    color='#3245ef',
                    alpha=.5,
                    s=np.pi * 50,
                    label='볼')

    plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.axis([lb, rb, bb, tb])

    plt.rcParams['axes.unicode_minus'] = False
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    plt.axis('off')
    ax.autoscale_view('tight')
    ax.text('0',
            '3.8',
            '스트라이크+볼',
            color='white',
            fontsize=14,
            horizontalalignment='center',
            weight='bold')

    ############
    # strikes
    ############
    ax = fig.add_subplot(232, facecolor='#313133')
    ax.tick_params(axis='x', colors='white')

    for r in svalues:
        plt.scatter(r[0],
                    r[1],
                    color='#ef2926',
                    alpha=.5,
                    s=np.pi * 50,
                    label='스트라이크')

    plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.axis([lb, rb, bb, tb])

    plt.rcParams['axes.unicode_minus'] = False
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    plt.axis('off')
    ax.autoscale_view('tight')
    ax.text('0',
            '3.8',
            '스트라이크',
            color='white',
            fontsize=14,
            horizontalalignment='center',
            weight='bold')

    ############
    # balls
    ############
    ax = fig.add_subplot(233, facecolor='#313133')
    ax.tick_params(axis='x', colors='white')

    for r in bvalues:
        plt.scatter(r[0],
                    r[1],
                    color='#3245ef',
                    alpha=.5,
                    s=np.pi * 50,
                    label='볼')

    plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.axis([lb, rb, bb, tb])

    plt.rcParams['axes.unicode_minus'] = False
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    plt.axis('off')
    ax.text('0',
            '3.8',
            '볼',
            color='white',
            fontsize=14,
            horizontalalignment='center',
            weight='bold')
    ax.autoscale_view('tight')

    ############
    # whiffs
    ############
    ax = fig.add_subplot(2, 3, 4, facecolor='#313133')
    ax.tick_params(axis='x', colors='white')

    for r in wvalues:
        plt.scatter(r[0],
                    r[1],
                    color='#1a1b1c',
                    alpha=.5,
                    s=np.pi * 50,
                    label='헛스윙')

    plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.axis([lb, rb, bb, tb])

    plt.rcParams['axes.unicode_minus'] = False
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    plt.axis('off')
    ax.text('0',
            '3.8',
            '헛스윙',
            color='white',
            fontsize=14,
            horizontalalignment='center',
            weight='bold')
    ax.autoscale_view('tight')

    ############
    # fouls
    ############
    ax = fig.add_subplot(235, facecolor='#313133')
    ax.tick_params(axis='x', colors='white')

    for r in fvalues:
        plt.scatter(r[0],
                    r[1],
                    color='#edf72c',
                    alpha=.5,
                    s=np.pi * 50,
                    label='파울')

    plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.axis([lb, rb, bb, tb])

    plt.rcParams['axes.unicode_minus'] = False
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    plt.axis('off')
    ax.text('0',
            '3.8',
            '파울',
            color='white',
            fontsize=14,
            horizontalalignment='center',
            weight='bold')
    ax.autoscale_view('tight')

    ############
    # inplays
    ############
    ax = fig.add_subplot(236, facecolor='#d19c49')
    ax.tick_params(axis='x', colors='white')

    for r in ivalues:
        plt.scatter(r[0],
                    r[1],
                    color='#8348d1',
                    alpha=.5,
                    s=np.pi * 50,
                    label='인플레이')

    plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3],
             color='#f9f9ff',
             linestyle='-',
             lw=1)
    plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1)

    plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5)
    plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5)

    plt.axis([lb, rb, bb, tb])

    plt.rcParams['axes.unicode_minus'] = False
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    plt.axis('off')
    ax.text('0',
            '3.8',
            '인플레이',
            color='white',
            fontsize=14,
            horizontalalignment='center',
            weight='bold')
    ax.autoscale_view('tight')

    plt.show()
Example #31
0
print(num_1[::-1])

print(num_1.sum())
print(num_1.mean())
print(np.median(num_1))

num_2 = num_1 + 5
result = np.corrcoef([num_1, num_2])
print(result)

num_1= num_1.reshape(6, 5)
print(num_1)

import matplotlib
import matplotlib.font_manager as fm
fm.get_fontconfig_fonts()



import matplotlib.pyplot as plt

x = [x for x in range(7,13)]
y = [456,492,578,599,670,854]

plt.plot(x,y,marker = 'o',color = 'orange')
plt.xlabel('month')
plt.ylabel('user')
plt.title('신규사용자')
plt.show()

Example #32
0
#!/usr/bin/env python3
#
# Filename: get_fontNames.py
# Author: Zhiguo Wang
# Date: 2/7/2020
#
# Description:
# Retrieve the names of all available system fonts
# Run this script from the command line

from matplotlib import font_manager

f_list = font_manager.get_fontconfig_fonts()
f_names = []
for font in f_list:
    try:
        f = font_manager.FontProperties(fname=font).get_name()
        f_names.append(f)
    except:
        pass

print(f_names)