def wordcloud_generation(skill_text, overall_skills, project_skills, unique_id,
                         name):
    wc = WordCloud(stopwords=None, max_words=200, max_font_size=100)

    wc.generate(skill_text.strip())

    wordcloud_layout = list()
    for (word, freq), fontsize, position, orientation, color in wc.layout_:
        color = find_color(word, overall_skills, project_skills)
        wordcloud_layout.append(
            ((word, freq), fontsize, position, orientation, color))
    wc.layout_ = wordcloud_layout

    plt.figure(figsize=(10, 10))
    plt.imshow(wc, interpolation='bilinear')
    plt.axis("off")
    plt.title(
        "Skills - (Skills in Project vs Overall Skills mentioned in Resume)\nRed - Skills in project, White - Overall Skills"
    )
    plt.tight_layout()
    plt.savefig('static/' + 'here' + unique_id + name + '.png', dpi=300)
    return
Esempio n. 2
0
def make_chronocloud(words_carac, n, name, langage):
    data = np.zeros((n, n, 3), dtype=np.uint8)
    dates = [str(year) for year in range(1840, 1991, 10)]
    dates[0] = '2000 | ' + dates[0]
    angles = [
        0, 337.5, 315.0, 292.5, 270.0, 247.5, 225.0, 202.5, 180.0, 157.5,
        135.0, 112.5, 90.0, 67.5, 45.0, 22.5
    ]
    data = generate_date_circle(data, dates, angles, 0.95)
    data = 255 - data
    print('chronocloud legend: done')
    if langage == 'Hebrew':
        the_font = 'NotoSansHebrew-Regular.ttf'
    elif langage == 'Chinese_simplified':
        the_font = 'NotoSansCJKtc-Regular.otf'
    else:
        the_font = 'NotoSans-Regular.ttf'

    # TODO: pack these in a dict `default_params` and pass as wc(..., **default_params)
    param_max_font_size = 0.03 * n
    param_relative_scaling = 0.5
    param_max_words = 5000

    # prepare the mask for the center, which is a ring or radius r_1
    resilience = 150  # for the center
    r_1 = 0.45 * (7 - (resilience / 25)) * (n / 5)
    a, b = n / 2, n / 2
    y, x = np.ogrid[0:n, 0:n]
    condition = (x - a) * (x - a) + (y - b) * (y - b) > r_1 * r_1

    # start with full image
    the_mask = np.zeros((n, n), dtype=np.uint8)
    the_mask[condition] = 255
    x_min = min(np.argwhere(the_mask == 0)[..., 0])
    x_max = max(np.argwhere(the_mask == 0)[..., 0])
    y_min = min(np.argwhere(the_mask == 0)[..., 1])
    y_max = max(np.argwhere(the_mask == 0)[..., 1])

    # but paint only inside the bounding rectangle of interest, because it's faster
    the_real_mask = the_mask[x_min:(x_max + 1), y_min:(y_max + 1)]

    the_words = {}
    the_colors = {}
    for word in words_carac:
        if words_carac[word][2] >= resilience:
            the_words[word] = words_carac[word][1]
            try:
                the_colors[word] = words_carac[word][3]
            except:
                print(words_carac[word])
    var_1, var_2 = [], []
    if the_words:  # it has some keys
        color_func_apply = partial(color_func, the_colors)
        wc = WordCloud(font_path=the_font,
                       mask=the_real_mask,
                       color_func=color_func_apply,
                       prefer_horizontal=0.5,
                       background_color='white',
                       max_words=param_max_words,
                       stopwords=[],
                       relative_scaling=param_relative_scaling,
                       max_font_size=param_max_font_size)
        wc.generate_from_frequencies(the_words)

        # extract data; need to expand the layout in order to add x_min, y_min
        var_1 += wc.words_
        for i in range(len(wc.layout_)):
            var_2.append(
                (wc.layout_[i][0], wc.layout_[i][1],
                 (wc.layout_[i][2][0] + x_min, wc.layout_[i][2][1] + y_min),
                 wc.layout_[i][3], wc.layout_[i][4]))
    print('chronocloud noyau: done')

    # prepare the sectors
    t = 0.002  # taux ligne
    p1 = 2.41421356237
    p2 = 0.41421356237
    #modele : Z1 (x - Z2 * a) + Z3 (y - Z4 * b) > 0
    c_1 = [[-1, 1 + t, 0, 1], [p1, 1, 1, 1]]
    c_2 = [[-p1, 1, -1, 1], [1, 1, 1, 1]]
    c_3 = [[-1, 1, -1, 1], [p2, 1, 1, 1]]
    c_4 = [[-p2, 1, -1, 1], [0, 1, 1, 1 - t]]
    c_5 = [[0, 1, -1, 1 + t], [-p2, 1, 1, 1]]
    c_6 = [[p2, 1, -1, 1], [-1, 1, 1, 1]]
    c_7 = [[1, 1, -1, 1], [-p1, 1, 1, 1]]
    c_8 = [[p1, 1, -1, 1], [-1, 1 + t, 0, 1]]
    c_9 = [[1, 1 - t, 0, 1], [-p1, 1, -1, 1]]
    c_10 = [[p1, 1, 1, 1], [-1, 1, -1, 1]]
    c_11 = [[1, 1, 1, 1], [-p2, 1, -1, 1]]
    c_12 = [[p2, 1, 1, 1], [0, 1, -1, 1 + t]]
    c_13 = [[0, 1, 1, 1 - t], [p2, 1, -1, 1]]
    c_14 = [[-p2, 1, 1, 1], [1, 1, -1, 1]]
    c_15 = [[-1, 1, 1, 1], [p1, 1, -1, 1]]
    c_16 = [[-p1, 1, 1, 1], [1, 1 - t, 0, 1]]
    arretes = [
        c_1, c_2, c_3, c_4, c_5, c_6, c_7, c_8, c_9, c_10, c_11, c_12, c_13,
        c_14, c_15, c_16
    ]

    # go through each sector[resilience, year]
    for resilience in [125, 100, 75, 50]:
        for years_lim in range(1840, 1991, 10):
            r_1 = 0.45 * (7 - (resilience / 25)) * (n / 5)
            r_2 = 0.45 * (6 - (resilience / 25)) * (n / 5)
            condition_1 = (x - a) * (x - a) + (y - b) * (y - b) > r_1 * r_1
            condition_2 = (x - a) * (x - a) + (y - b) * (y - b) < r_2 * r_2

            indice = (years_lim - 1840) // 10
            z1 = arretes[indice][0][0]
            z2 = arretes[indice][0][1]
            z3 = arretes[indice][0][2]
            z4 = arretes[indice][0][3]
            condition_3 = z1 * (x - z2 * a) + z3 * (y - z4 * b) > 0

            z1 = arretes[indice][1][0]
            z2 = arretes[indice][1][1]
            z3 = arretes[indice][1][2]
            z4 = arretes[indice][1][3]
            condition_4 = z1 * (x - z2 * a) + z3 * (y - z4 * b) > 0

            # TODO: aren't these redundant ?
            the_mask = np.zeros((n, n), dtype=np.uint8)
            the_mask[condition_1] = 255
            the_mask[condition_2] = 255
            the_mask[condition_3] = 255
            the_mask[condition_4] = 255

            x_min = min(np.argwhere(the_mask == 0)[..., 0])
            x_max = max(np.argwhere(the_mask == 0)[..., 0])
            y_min = min(np.argwhere(the_mask == 0)[..., 1])
            y_max = max(np.argwhere(the_mask == 0)[..., 1])
            the_real_mask = the_mask[x_min:(x_max + 1), y_min:(y_max + 1)]

            # TODO: generate the words relevant for this (resilience, year)
            the_words = {}
            the_colors = {}
            for word in words_carac:
                if resilience <= words_carac[word][2] < resilience + 25 and \
                   years_lim  <= words_carac[word][0] < years_lim  + 10:
                    the_words[word] = words_carac[word][1]
                    the_colors[word] = words_carac[word][3]

            if the_words:
                color_func_apply = partial(color_func, the_colors)
                wc = WordCloud(font_path=the_font,
                               mask=the_real_mask,
                               color_func=color_func_apply,
                               prefer_horizontal=0.5,
                               background_color='white',
                               max_words=param_max_words,
                               stopwords=[],
                               relative_scaling=param_relative_scaling,
                               max_font_size=param_max_font_size)
                wc.generate_from_frequencies(the_words)
                var_1 += wc.words_
                for i in range(len(wc.layout_)):
                    var_2.append((wc.layout_[i][0], wc.layout_[i][1],
                                  (wc.layout_[i][2][0] + x_min,
                                   wc.layout_[i][2][1] + y_min),
                                  wc.layout_[i][3], wc.layout_[i][4]))
        print('chronocloud R=' + str(resilience) + ': done')

    wc_montre = WordCloud(font_path=the_font,
                          background_color='white',
                          width=n,
                          height=n)
    wc_montre.words_ = var_1
    wc_montre.layout_ = var_2
    fichier = open(name + '_chronodata_words_alt.txt', 'w')
    for i in range(len(var_1)):
        fichier.write(str(var_1[i]) + '\n')
    fichier.close()
    fichier = open(name + '_chronodata_layout_alt.txt', 'w')
    for i in range(len(var_2)):
        fichier.write(str(var_2[i]) + '\n')
    fichier.close()
    data_1 = 255 - data
    data_2 = 255 - wc_montre.to_array()
    data = data_1 + data_2
    data[data > 255] = 255
    data = 255 - data
    image_from_np_2(data).save(name + '_chronocloud.png')
Esempio n. 3
0
def voxel(v, name):

    voxel_idx = int(v)

    # Prepare variables
    regparams = app.df.loc[voxel_idx]

    # Generate a lookup by concept name
    lookup = get_lookup()

    # We are only interested in nonzero concepts
    regparams = pandas.DataFrame(regparams[regparams != 0])
    concepts = regparams.index.tolist()
    colors = random_colors(concepts)

    regparams["key"] = [lookup[x] for x in regparams.index]
    regparams["color"] = [colors[x] for x in regparams.index]
    regparams.columns = ['value', 'key', 'color']

    # Generate a word cloud image, take regression params into account
    scaled = (regparams['value'].abs() * 1000).copy()
    text = []
    for k, v in scaled.iteritems():
        multiply_by = int(v)
        string = [regparams.loc[k]['key'].replace(" ", "_")] * multiply_by
        text = text + string

    text = " ".join(text)
    regparams = regparams.to_json(orient="records")

    # Min and max values for the color scale
    min_voxel = app.X.loc[:, voxel_idx].min()
    max_voxel = app.X.loc[:, voxel_idx].max()

    # We will let the user select a voxel location based on region
    regions = app.regions.to_dict(orient="records")

    wordcloud = WordCloud(max_font_size=100,
                          width=app.width,
                          height=app.height,
                          relative_scaling=1.0,
                          background_color="white").generate(text)

    # Remove "_" in words
    words = []
    for tup in wordcloud.words_:
        words.append((tup[0].replace("_", " "), tup[1]))
    wordcloud.words_ = words

    layout = []
    for tup in wordcloud.layout_:
        newtup = ((tup[0][0].replace("_", " "), tup[0][1]), tup[1], tup[2],
                  tup[3], tup[4])
        layout.append(newtup)
    wordcloud.layout_ = layout

    plt.imshow(wordcloud)
    plt.axis("off")
    sio = cStringIO.StringIO()
    plt.savefig(sio, format="png")
    png_data = sio.getvalue().encode("base64").strip()

    return render_template("cloud.html",
                           regparams=regparams,
                           min=app.df.loc[voxel_idx].min(),
                           max=app.df.loc[voxel_idx].max(),
                           width=app.width,
                           min_voxel=min_voxel,
                           max_voxel=max_voxel,
                           height=app.height,
                           padding=app.padding,
                           radius=app.radius,
                           maxRadius=app.maxRadius,
                           lookup=lookup,
                           colors=colors,
                           png_data=png_data,
                           voxel=voxel_idx,
                           regions=regions,
                           region_name=name)
Esempio n. 4
0
def make_chronocloud(words_carac, n, resiliences, periods, name):
    debut = datetime.now()
    data = np.zeros((n, n, 3), dtype=np.uint8)
    dates = []
    dates.append(str(periods[-1]) + ' | ' + str(periods[0]))
    for i in range(1, len(periods) - 1):
        dates.append(str(periods[i]))
    angles = [0, 315.0, 270.0, 225.0, 180.0, 135.0, 90.0, 45.0]
    data = generate_date_circle(data, dates, angles, 0.95)
    data = 255 - data
    the_font = 'NotoSans-Regular.ttf'
    param_max_font_size = 0.03 * n
    param_relative_scaling = 0.3
    resilience = resiliences[0]
    r_1 = 0.45 * (n / 5)
    a, b = n / 2, n / 2
    y, x = np.ogrid[0:n, 0:n]
    condition = (x - a) * (x - a) + (y - b) * (y - b) > r_1 * r_1
    the_mask = np.zeros((n, n), dtype=np.int)
    the_mask[condition] = [255] * len(the_mask[condition])
    the_frequencies = {}
    for word in words_carac.keys():
        if words_carac[word][2] >= resilience:
            the_frequencies[word] = words_carac[word][1]
    var_1 = []
    var_2 = []
    if len(the_frequencies) > 0:
        wc = WordCloud(font_path=the_font,
                       background_color='white',
                       max_words=50000,
                       mask=the_mask,
                       stopwords=[],
                       prefer_horizontal=0.5,
                       width=the_mask.shape[0],
                       height=the_mask.shape[1],
                       relative_scaling=param_relative_scaling,
                       max_font_size=param_max_font_size)
        wc.generate_from_frequencies(the_frequencies)
        var_1 += wc.words_
        var_2 += wc.layout_
    os.makedirs(name + '_sections', exist_ok=True)
    write_section(var_2, words_carac,
                  name + '_sections/' + name + '_' + str(resilience) + '.txt')
    fin = datetime.now()
    print('resilience ' + str(resilience) + ' => done / ' + str(fin - debut))
    c_1 = [0 * y > (x - a), (y - b) > -(x - a)]
    c_2 = [(y - b) < -(x - a), (y - b) > x * 0]
    c_3 = [(y - b) < x * 0, (y - b) > (x - a)]
    c_4 = [(y - b) < (x - a), (x - a) < y * 0]
    c_5 = [(x - a) > y * 0, (y - b) < -(x - a)]
    c_6 = [(y - b) > -(x - a), (y - b) < x * 0]
    c_7 = [(y - b) > x * 0, (y - b) < (x - a)]
    c_8 = [(y - b) > (x - a), 0 * y < (x - a)]
    arretes = [c_1, c_2, c_3, c_4, c_5, c_6, c_7, c_8]
    for res_ind in range(4):
        resilience_sup = resiliences[res_ind]
        resilience_inf = resiliences[res_ind + 1]
        debut = datetime.now()
        for indice in range(len(periods[:-1])):
            years_inf = periods[indice]
            years_sup = periods[indice + 1]
            r_1 = 0.45 * (res_ind + 2) * (n / 5)
            r_2 = 0.45 * (res_ind + 1) * (n / 5)
            condition_1 = (x - a) * (x - a) + (y - b) * (y - b) > r_1 * r_1
            condition_2 = (x - a) * (x - a) + (y - b) * (y - b) < r_2 * r_2
            the_mask = np.zeros((n, n), dtype=np.int)
            the_mask[condition_1] = [255] * len(the_mask[condition_1])
            the_mask[condition_2] = [255] * len(the_mask[condition_2])
            the_mask[arretes[indice][0]] = list(
                [255] * len(the_mask[arretes[indice][0]]))
            the_mask[arretes[indice][1]] = list(
                [255] * len(the_mask[arretes[indice][1]]))
            the_frequencies = {}
            for word in words_carac:
                res_bol = words_carac[word][
                    2] >= resilience_inf and words_carac[word][
                        2] < resilience_sup
                year_bol = words_carac[word][0] >= years_inf and words_carac[
                    word][0] < years_sup
                if res_bol and year_bol:
                    the_frequencies[word] = words_carac[word][1]
            if len(the_frequencies) > 0:
                wc = WordCloud(font_path=the_font,
                               background_color='white',
                               max_words=50000,
                               mask=the_mask,
                               stopwords=[],
                               prefer_horizontal=0.5,
                               width=the_mask.shape[0],
                               height=the_mask.shape[1],
                               max_font_size=param_max_font_size)
                wc.generate_from_frequencies(the_frequencies)
                var_1 += wc.words_
                var_2 += wc.layout_
                write_section(
                    wc.layout_, words_carac, name + '_sections/' + name + '_' +
                    str(resilience_inf) + '_' + str(years_inf) + '.txt')
        wc_montre = WordCloud(font_path=the_font,
                              background_color='white',
                              width=n,
                              height=n)
        wc_montre.words_ = var_1
        wc_montre.layout_ = var_2
        color_func_apply = partial(color_func, 2000, words_carac)
        wc_montre.recolor(color_func=color_func_apply)
        data_1 = 255 - data
        data_2 = 255 - wc_montre.to_array()
        data_3 = data_1 + data_2
        data_3[data_3 > 255] = 255
        data_3 = 255 - data_3
        fin = datetime.now()
        print('resilience ' + str(resilience_inf) + ' => done / ' +
              str(fin - debut))
    Image.fromarray(data_3, 'RGB').save(name + '.png')
Esempio n. 5
0
def voxel(v,name):

    voxel_idx = int(v)

    # Prepare variables
    regparams = app.df.loc[voxel_idx]

    # Generate a lookup by concept name
    lookup = get_lookup()

    # We are only interested in nonzero concepts
    regparams = pandas.DataFrame(regparams[regparams!=0])
    concepts = regparams.index.tolist()
    colors = random_colors(concepts)

    regparams["key"] = [lookup[x] for x in regparams.index]
    regparams["color"] = [colors[x] for x in regparams.index]
    regparams.columns = ['value', 'key', 'color']

    # Generate a word cloud image, take regression params into account
    scaled = (regparams['value'].abs()*1000).copy()
    text = []
    for k,v in scaled.iteritems():
        multiply_by = int(v)
        string = [regparams.loc[k]['key'].replace(" ","_")] * multiply_by
        text = text + string

    text =  " ".join(text)
    regparams = regparams.to_json(orient="records")

    # Min and max values for the color scale
    min_voxel = app.X.loc[:,voxel_idx].min()
    max_voxel = app.X.loc[:,voxel_idx].max()

    # We will let the user select a voxel location based on region
    regions = app.regions.to_dict(orient="records")
    
    wordcloud = WordCloud(max_font_size=100, width=app.width, height=app.height,
                          relative_scaling=1.0, background_color="white").generate(text)

    # Remove "_" in words
    words = []
    for tup in wordcloud.words_:
        words.append((tup[0].replace("_"," "),tup[1]))
    wordcloud.words_ = words

    layout = []
    for tup in wordcloud.layout_:
        newtup = ((tup[0][0].replace("_"," "),tup[0][1]),
                  tup[1],
                  tup[2],
                  tup[3],
                  tup[4])
        layout.append(newtup)
    wordcloud.layout_ = layout

    plt.imshow(wordcloud)
    plt.axis("off")
    sio = cStringIO.StringIO()
    plt.savefig(sio, format="png")
    png_data = sio.getvalue().encode("base64").strip()

    return render_template("cloud.html",regparams=regparams,
                                        min=app.df.loc[voxel_idx].min(),
                                        max=app.df.loc[voxel_idx].max(),
                                        width=app.width,
                                        min_voxel=min_voxel,
                                        max_voxel=max_voxel,
                                        height=app.height,
                                        padding=app.padding,
                                        radius=app.radius,
                                        maxRadius=app.maxRadius,
                                        lookup=lookup,
                                        colors=colors,
                                        png_data=png_data,
                                        voxel=voxel_idx,
                                        regions=regions,
                                        region_name=name)