def wordcloud_generation(skill_text, overall_skills, project_skills, unique_id, name): wc = WordCloud(stopwords=None, max_words=200, max_font_size=100) wc.generate(skill_text.strip()) wordcloud_layout = list() for (word, freq), fontsize, position, orientation, color in wc.layout_: color = find_color(word, overall_skills, project_skills) wordcloud_layout.append( ((word, freq), fontsize, position, orientation, color)) wc.layout_ = wordcloud_layout plt.figure(figsize=(10, 10)) plt.imshow(wc, interpolation='bilinear') plt.axis("off") plt.title( "Skills - (Skills in Project vs Overall Skills mentioned in Resume)\nRed - Skills in project, White - Overall Skills" ) plt.tight_layout() plt.savefig('static/' + 'here' + unique_id + name + '.png', dpi=300) return
def make_chronocloud(words_carac, n, name, langage): data = np.zeros((n, n, 3), dtype=np.uint8) dates = [str(year) for year in range(1840, 1991, 10)] dates[0] = '2000 | ' + dates[0] angles = [ 0, 337.5, 315.0, 292.5, 270.0, 247.5, 225.0, 202.5, 180.0, 157.5, 135.0, 112.5, 90.0, 67.5, 45.0, 22.5 ] data = generate_date_circle(data, dates, angles, 0.95) data = 255 - data print('chronocloud legend: done') if langage == 'Hebrew': the_font = 'NotoSansHebrew-Regular.ttf' elif langage == 'Chinese_simplified': the_font = 'NotoSansCJKtc-Regular.otf' else: the_font = 'NotoSans-Regular.ttf' # TODO: pack these in a dict `default_params` and pass as wc(..., **default_params) param_max_font_size = 0.03 * n param_relative_scaling = 0.5 param_max_words = 5000 # prepare the mask for the center, which is a ring or radius r_1 resilience = 150 # for the center r_1 = 0.45 * (7 - (resilience / 25)) * (n / 5) a, b = n / 2, n / 2 y, x = np.ogrid[0:n, 0:n] condition = (x - a) * (x - a) + (y - b) * (y - b) > r_1 * r_1 # start with full image the_mask = np.zeros((n, n), dtype=np.uint8) the_mask[condition] = 255 x_min = min(np.argwhere(the_mask == 0)[..., 0]) x_max = max(np.argwhere(the_mask == 0)[..., 0]) y_min = min(np.argwhere(the_mask == 0)[..., 1]) y_max = max(np.argwhere(the_mask == 0)[..., 1]) # but paint only inside the bounding rectangle of interest, because it's faster the_real_mask = the_mask[x_min:(x_max + 1), y_min:(y_max + 1)] the_words = {} the_colors = {} for word in words_carac: if words_carac[word][2] >= resilience: the_words[word] = words_carac[word][1] try: the_colors[word] = words_carac[word][3] except: print(words_carac[word]) var_1, var_2 = [], [] if the_words: # it has some keys color_func_apply = partial(color_func, the_colors) wc = WordCloud(font_path=the_font, mask=the_real_mask, color_func=color_func_apply, prefer_horizontal=0.5, background_color='white', max_words=param_max_words, stopwords=[], relative_scaling=param_relative_scaling, max_font_size=param_max_font_size) wc.generate_from_frequencies(the_words) # extract data; need to expand the layout in order to add x_min, y_min var_1 += wc.words_ for i in range(len(wc.layout_)): var_2.append( (wc.layout_[i][0], wc.layout_[i][1], (wc.layout_[i][2][0] + x_min, wc.layout_[i][2][1] + y_min), wc.layout_[i][3], wc.layout_[i][4])) print('chronocloud noyau: done') # prepare the sectors t = 0.002 # taux ligne p1 = 2.41421356237 p2 = 0.41421356237 #modele : Z1 (x - Z2 * a) + Z3 (y - Z4 * b) > 0 c_1 = [[-1, 1 + t, 0, 1], [p1, 1, 1, 1]] c_2 = [[-p1, 1, -1, 1], [1, 1, 1, 1]] c_3 = [[-1, 1, -1, 1], [p2, 1, 1, 1]] c_4 = [[-p2, 1, -1, 1], [0, 1, 1, 1 - t]] c_5 = [[0, 1, -1, 1 + t], [-p2, 1, 1, 1]] c_6 = [[p2, 1, -1, 1], [-1, 1, 1, 1]] c_7 = [[1, 1, -1, 1], [-p1, 1, 1, 1]] c_8 = [[p1, 1, -1, 1], [-1, 1 + t, 0, 1]] c_9 = [[1, 1 - t, 0, 1], [-p1, 1, -1, 1]] c_10 = [[p1, 1, 1, 1], [-1, 1, -1, 1]] c_11 = [[1, 1, 1, 1], [-p2, 1, -1, 1]] c_12 = [[p2, 1, 1, 1], [0, 1, -1, 1 + t]] c_13 = [[0, 1, 1, 1 - t], [p2, 1, -1, 1]] c_14 = [[-p2, 1, 1, 1], [1, 1, -1, 1]] c_15 = [[-1, 1, 1, 1], [p1, 1, -1, 1]] c_16 = [[-p1, 1, 1, 1], [1, 1 - t, 0, 1]] arretes = [ c_1, c_2, c_3, c_4, c_5, c_6, c_7, c_8, c_9, c_10, c_11, c_12, c_13, c_14, c_15, c_16 ] # go through each sector[resilience, year] for resilience in [125, 100, 75, 50]: for years_lim in range(1840, 1991, 10): r_1 = 0.45 * (7 - (resilience / 25)) * (n / 5) r_2 = 0.45 * (6 - (resilience / 25)) * (n / 5) condition_1 = (x - a) * (x - a) + (y - b) * (y - b) > r_1 * r_1 condition_2 = (x - a) * (x - a) + (y - b) * (y - b) < r_2 * r_2 indice = (years_lim - 1840) // 10 z1 = arretes[indice][0][0] z2 = arretes[indice][0][1] z3 = arretes[indice][0][2] z4 = arretes[indice][0][3] condition_3 = z1 * (x - z2 * a) + z3 * (y - z4 * b) > 0 z1 = arretes[indice][1][0] z2 = arretes[indice][1][1] z3 = arretes[indice][1][2] z4 = arretes[indice][1][3] condition_4 = z1 * (x - z2 * a) + z3 * (y - z4 * b) > 0 # TODO: aren't these redundant ? the_mask = np.zeros((n, n), dtype=np.uint8) the_mask[condition_1] = 255 the_mask[condition_2] = 255 the_mask[condition_3] = 255 the_mask[condition_4] = 255 x_min = min(np.argwhere(the_mask == 0)[..., 0]) x_max = max(np.argwhere(the_mask == 0)[..., 0]) y_min = min(np.argwhere(the_mask == 0)[..., 1]) y_max = max(np.argwhere(the_mask == 0)[..., 1]) the_real_mask = the_mask[x_min:(x_max + 1), y_min:(y_max + 1)] # TODO: generate the words relevant for this (resilience, year) the_words = {} the_colors = {} for word in words_carac: if resilience <= words_carac[word][2] < resilience + 25 and \ years_lim <= words_carac[word][0] < years_lim + 10: the_words[word] = words_carac[word][1] the_colors[word] = words_carac[word][3] if the_words: color_func_apply = partial(color_func, the_colors) wc = WordCloud(font_path=the_font, mask=the_real_mask, color_func=color_func_apply, prefer_horizontal=0.5, background_color='white', max_words=param_max_words, stopwords=[], relative_scaling=param_relative_scaling, max_font_size=param_max_font_size) wc.generate_from_frequencies(the_words) var_1 += wc.words_ for i in range(len(wc.layout_)): var_2.append((wc.layout_[i][0], wc.layout_[i][1], (wc.layout_[i][2][0] + x_min, wc.layout_[i][2][1] + y_min), wc.layout_[i][3], wc.layout_[i][4])) print('chronocloud R=' + str(resilience) + ': done') wc_montre = WordCloud(font_path=the_font, background_color='white', width=n, height=n) wc_montre.words_ = var_1 wc_montre.layout_ = var_2 fichier = open(name + '_chronodata_words_alt.txt', 'w') for i in range(len(var_1)): fichier.write(str(var_1[i]) + '\n') fichier.close() fichier = open(name + '_chronodata_layout_alt.txt', 'w') for i in range(len(var_2)): fichier.write(str(var_2[i]) + '\n') fichier.close() data_1 = 255 - data data_2 = 255 - wc_montre.to_array() data = data_1 + data_2 data[data > 255] = 255 data = 255 - data image_from_np_2(data).save(name + '_chronocloud.png')
def voxel(v, name): voxel_idx = int(v) # Prepare variables regparams = app.df.loc[voxel_idx] # Generate a lookup by concept name lookup = get_lookup() # We are only interested in nonzero concepts regparams = pandas.DataFrame(regparams[regparams != 0]) concepts = regparams.index.tolist() colors = random_colors(concepts) regparams["key"] = [lookup[x] for x in regparams.index] regparams["color"] = [colors[x] for x in regparams.index] regparams.columns = ['value', 'key', 'color'] # Generate a word cloud image, take regression params into account scaled = (regparams['value'].abs() * 1000).copy() text = [] for k, v in scaled.iteritems(): multiply_by = int(v) string = [regparams.loc[k]['key'].replace(" ", "_")] * multiply_by text = text + string text = " ".join(text) regparams = regparams.to_json(orient="records") # Min and max values for the color scale min_voxel = app.X.loc[:, voxel_idx].min() max_voxel = app.X.loc[:, voxel_idx].max() # We will let the user select a voxel location based on region regions = app.regions.to_dict(orient="records") wordcloud = WordCloud(max_font_size=100, width=app.width, height=app.height, relative_scaling=1.0, background_color="white").generate(text) # Remove "_" in words words = [] for tup in wordcloud.words_: words.append((tup[0].replace("_", " "), tup[1])) wordcloud.words_ = words layout = [] for tup in wordcloud.layout_: newtup = ((tup[0][0].replace("_", " "), tup[0][1]), tup[1], tup[2], tup[3], tup[4]) layout.append(newtup) wordcloud.layout_ = layout plt.imshow(wordcloud) plt.axis("off") sio = cStringIO.StringIO() plt.savefig(sio, format="png") png_data = sio.getvalue().encode("base64").strip() return render_template("cloud.html", regparams=regparams, min=app.df.loc[voxel_idx].min(), max=app.df.loc[voxel_idx].max(), width=app.width, min_voxel=min_voxel, max_voxel=max_voxel, height=app.height, padding=app.padding, radius=app.radius, maxRadius=app.maxRadius, lookup=lookup, colors=colors, png_data=png_data, voxel=voxel_idx, regions=regions, region_name=name)
def make_chronocloud(words_carac, n, resiliences, periods, name): debut = datetime.now() data = np.zeros((n, n, 3), dtype=np.uint8) dates = [] dates.append(str(periods[-1]) + ' | ' + str(periods[0])) for i in range(1, len(periods) - 1): dates.append(str(periods[i])) angles = [0, 315.0, 270.0, 225.0, 180.0, 135.0, 90.0, 45.0] data = generate_date_circle(data, dates, angles, 0.95) data = 255 - data the_font = 'NotoSans-Regular.ttf' param_max_font_size = 0.03 * n param_relative_scaling = 0.3 resilience = resiliences[0] r_1 = 0.45 * (n / 5) a, b = n / 2, n / 2 y, x = np.ogrid[0:n, 0:n] condition = (x - a) * (x - a) + (y - b) * (y - b) > r_1 * r_1 the_mask = np.zeros((n, n), dtype=np.int) the_mask[condition] = [255] * len(the_mask[condition]) the_frequencies = {} for word in words_carac.keys(): if words_carac[word][2] >= resilience: the_frequencies[word] = words_carac[word][1] var_1 = [] var_2 = [] if len(the_frequencies) > 0: wc = WordCloud(font_path=the_font, background_color='white', max_words=50000, mask=the_mask, stopwords=[], prefer_horizontal=0.5, width=the_mask.shape[0], height=the_mask.shape[1], relative_scaling=param_relative_scaling, max_font_size=param_max_font_size) wc.generate_from_frequencies(the_frequencies) var_1 += wc.words_ var_2 += wc.layout_ os.makedirs(name + '_sections', exist_ok=True) write_section(var_2, words_carac, name + '_sections/' + name + '_' + str(resilience) + '.txt') fin = datetime.now() print('resilience ' + str(resilience) + ' => done / ' + str(fin - debut)) c_1 = [0 * y > (x - a), (y - b) > -(x - a)] c_2 = [(y - b) < -(x - a), (y - b) > x * 0] c_3 = [(y - b) < x * 0, (y - b) > (x - a)] c_4 = [(y - b) < (x - a), (x - a) < y * 0] c_5 = [(x - a) > y * 0, (y - b) < -(x - a)] c_6 = [(y - b) > -(x - a), (y - b) < x * 0] c_7 = [(y - b) > x * 0, (y - b) < (x - a)] c_8 = [(y - b) > (x - a), 0 * y < (x - a)] arretes = [c_1, c_2, c_3, c_4, c_5, c_6, c_7, c_8] for res_ind in range(4): resilience_sup = resiliences[res_ind] resilience_inf = resiliences[res_ind + 1] debut = datetime.now() for indice in range(len(periods[:-1])): years_inf = periods[indice] years_sup = periods[indice + 1] r_1 = 0.45 * (res_ind + 2) * (n / 5) r_2 = 0.45 * (res_ind + 1) * (n / 5) condition_1 = (x - a) * (x - a) + (y - b) * (y - b) > r_1 * r_1 condition_2 = (x - a) * (x - a) + (y - b) * (y - b) < r_2 * r_2 the_mask = np.zeros((n, n), dtype=np.int) the_mask[condition_1] = [255] * len(the_mask[condition_1]) the_mask[condition_2] = [255] * len(the_mask[condition_2]) the_mask[arretes[indice][0]] = list( [255] * len(the_mask[arretes[indice][0]])) the_mask[arretes[indice][1]] = list( [255] * len(the_mask[arretes[indice][1]])) the_frequencies = {} for word in words_carac: res_bol = words_carac[word][ 2] >= resilience_inf and words_carac[word][ 2] < resilience_sup year_bol = words_carac[word][0] >= years_inf and words_carac[ word][0] < years_sup if res_bol and year_bol: the_frequencies[word] = words_carac[word][1] if len(the_frequencies) > 0: wc = WordCloud(font_path=the_font, background_color='white', max_words=50000, mask=the_mask, stopwords=[], prefer_horizontal=0.5, width=the_mask.shape[0], height=the_mask.shape[1], max_font_size=param_max_font_size) wc.generate_from_frequencies(the_frequencies) var_1 += wc.words_ var_2 += wc.layout_ write_section( wc.layout_, words_carac, name + '_sections/' + name + '_' + str(resilience_inf) + '_' + str(years_inf) + '.txt') wc_montre = WordCloud(font_path=the_font, background_color='white', width=n, height=n) wc_montre.words_ = var_1 wc_montre.layout_ = var_2 color_func_apply = partial(color_func, 2000, words_carac) wc_montre.recolor(color_func=color_func_apply) data_1 = 255 - data data_2 = 255 - wc_montre.to_array() data_3 = data_1 + data_2 data_3[data_3 > 255] = 255 data_3 = 255 - data_3 fin = datetime.now() print('resilience ' + str(resilience_inf) + ' => done / ' + str(fin - debut)) Image.fromarray(data_3, 'RGB').save(name + '.png')
def voxel(v,name): voxel_idx = int(v) # Prepare variables regparams = app.df.loc[voxel_idx] # Generate a lookup by concept name lookup = get_lookup() # We are only interested in nonzero concepts regparams = pandas.DataFrame(regparams[regparams!=0]) concepts = regparams.index.tolist() colors = random_colors(concepts) regparams["key"] = [lookup[x] for x in regparams.index] regparams["color"] = [colors[x] for x in regparams.index] regparams.columns = ['value', 'key', 'color'] # Generate a word cloud image, take regression params into account scaled = (regparams['value'].abs()*1000).copy() text = [] for k,v in scaled.iteritems(): multiply_by = int(v) string = [regparams.loc[k]['key'].replace(" ","_")] * multiply_by text = text + string text = " ".join(text) regparams = regparams.to_json(orient="records") # Min and max values for the color scale min_voxel = app.X.loc[:,voxel_idx].min() max_voxel = app.X.loc[:,voxel_idx].max() # We will let the user select a voxel location based on region regions = app.regions.to_dict(orient="records") wordcloud = WordCloud(max_font_size=100, width=app.width, height=app.height, relative_scaling=1.0, background_color="white").generate(text) # Remove "_" in words words = [] for tup in wordcloud.words_: words.append((tup[0].replace("_"," "),tup[1])) wordcloud.words_ = words layout = [] for tup in wordcloud.layout_: newtup = ((tup[0][0].replace("_"," "),tup[0][1]), tup[1], tup[2], tup[3], tup[4]) layout.append(newtup) wordcloud.layout_ = layout plt.imshow(wordcloud) plt.axis("off") sio = cStringIO.StringIO() plt.savefig(sio, format="png") png_data = sio.getvalue().encode("base64").strip() return render_template("cloud.html",regparams=regparams, min=app.df.loc[voxel_idx].min(), max=app.df.loc[voxel_idx].max(), width=app.width, min_voxel=min_voxel, max_voxel=max_voxel, height=app.height, padding=app.padding, radius=app.radius, maxRadius=app.maxRadius, lookup=lookup, colors=colors, png_data=png_data, voxel=voxel_idx, regions=regions, region_name=name)