def get_grid(grid_size, data_sample, grid, nr_words): input_template = input_directory_cooc + r"\complete_cooc\_" non_blob_word_counter=0 word_counter=0 print("at file") for letter in "abcdefghijklmnopqrstuvwxyz": try: print(letter) f = open(input_template+letter+".txt",'r') line_nr = 0 for line in f: line = line.replace(";\n", "") line = line.split(";") word = line[0] word_counter += 1 if word not in grid[2]: del line[0] if len(line) == 0: print( word, "at line", line_nr, "in file", letter) nr_words -= 1 else: non_blob_word_counter+=1 best_index = add_to_sim_word(word, line, data_sample, grid) grid[3][best_index].append(word) f.close() except IOError: print("file", letter, "not found") print("\n New nr words:", nr_words) print("nr non blob words actually encountered",non_blob_word_counter,"all actually seen words", word_counter) del data_sample colors = get_colors() write_blob_file(grid) if space_to_grid_type == "new": assignment = puzzle_grid.space_to_grid(grid, colors,nr_words, output_directory, grid_size) new_grid_size = math.ceil(math.sqrt(nr_words)) index = 0 for i in range(new_grid_size): for j in range(new_grid_size): if assignment[i][j] != None: grid_f[i][j] = GridElem(index,[i,j],assignment[i][j].name,assignment[i][j].color, assignment[i][j].blob_nr) global_index[index] = grid_f[i][j] global_name[grid_f[i][j].name] = grid_f[i][j] index+=1 elif space_to_grid_type == "balls" or space_to_grid_type == "stripy": enlarge = 5 new_grid_size = math.ceil(math.sqrt(nr_words))+enlarge ratio = new_grid_size/float(grid_size) ratio_resize = ratio/scale_factor # shift_resize = (new_grid_size - grid_size*ratio_resize)/2 + ratio_resize shift_resize = (new_grid_size - grid_size*ratio_resize)/2 + ratio_resize/2 print("shift", shift_resize) data_space = np.zeros((nr_words,2)) index = 0 for i in range(len(grid[0])): x = grid[1][i] y = grid_size-1-grid[0][i] new_pos = np.array([x ,y])* ratio_resize + shift_resize data_space[index,:] = new_pos global_index[index] = [grid[2][i],i] assigned = len(grid[3][i]) index+=1 data_space[index:index+assigned,0].fill(new_pos[0]) data_space[index:index+assigned,1].fill(new_pos[1]) random_dist = np.random.rand(assigned,2) # print("random", random_dist.shape) # print("data space slice", data_space[index:index+assigned,:].shape) data_space[index:index+assigned,:] = (random_dist-0.5)*ratio_resize + data_space[index:index+assigned,:] for elem in grid[3][i]: global_index[index] = [elem, i] index+=1 print( "data space shape", data_space.shape) print("last index = ", index-1 ) used_marker = "o" if nr_words > 1000: used_marker = "." coloring = [] color_index = {} color_file = open(output_directory+r"\color_file.txt", "w") print("COLOR FILE") for i in range(nr_words): coloring.append( colors[global_index[i][1]%len(colors)] ) color_index[i] = colors[global_index[i][1]%len(colors)] color_file.write(str(i) + ";" + str(color_index[i])+"\n") color_file.close() ################ image_name = output_directory + r"\test_grid_coloring.pdf" fig = plt.figure() plt.plot(data_space[0,0], data_space[0,1], c=coloring[0], marker='o') plt.text(data_space[0,0]+1, data_space[0,1], grid[2][0]) n = data_space.shape[0]-1 plt.plot(data_space[n,0], data_space[n,1], c=coloring[n], marker='o') plt.text(data_space[n,0]+1, data_space[n,1], grid[2][len(grid[2])-1]) plt.axis([-1,new_grid_size+1, -1, new_grid_size+1]) plt.title("BLOB TEST") fig.savefig(image_name, bbox_inches='tight') plt.close() ################ image_name = output_directory + r"\stg_init_plot_blobColoring.pdf" fig = plt.figure(figsize=(figure_size, figure_size)) # fig = plt.figure(figsize=(figure_size, figure_size), dpi=figure_dpi) prop_plot = plt.scatter(data_space[:,0], data_space[:,1], c=coloring, marker=used_marker) if nr_words > 1000: prop_plot.set_edgecolor("none") plt.axis([-1,new_grid_size+1, -1, new_grid_size+1]) plt.title("Initial blob coloring") fig.savefig(image_name, bbox_inches='tight') plt.close() print("new grid size", new_grid_size) print("old grid size", old_grid_size) intermediate_grids = output_directory + "\intermediate_grids" if not os.path.exists(intermediate_grids): os.makedirs(intermediate_grids) print("directory made") if space_to_grid_type == "stripy": assignment, gz = stg.space_to_grid_iterative(data_space, intermediate_grids, log_memory, with_figures=False, blob_nr_keeper=stg.TypeKeeper(color_index), scale=False, grid_enlarge = enlarge) else: assignment, gz = stg_nice_try.space_to_grid_iterative(data_space, intermediate_grids, log_memory, with_figures=False, blob_nr_keeper=stg.TypeKeeper(color_index), scale=False) del data_space print("init final grid and make figure of grid") image_name = output_directory + r"\stg_result_plot_blobColoring.pdf" # fig = plt.figure(figsize=(figure_size, figure_size), dpi=figure_dpi) fig = plt.figure(figsize=(figure_size, figure_size)) for elem in assignment: row = int(new_grid_size)-elem[1]-1 column = elem[0] # print("elem", elem, "row", row, "column", column) grid_f[row][column] = GridElem(elem[2],[row,column], global_index[elem[2]][0], color_index[elem[2]], global_index[elem[2]][1] ) global_index[elem[2]] = grid_f[row][column] global_name[grid_f[row][column].name] = grid_f[row][column] prop_plot = plt.scatter(elem[0], elem[1], c=color_index[elem[2]] , marker=used_marker) if nr_words > 1000: prop_plot.set_edgecolor("none") plt.axis([-1,new_grid_size, -1, new_grid_size]) plt.title("Resulting blob coloring") fig.savefig(image_name, bbox_inches='tight') plt.close() print("Initial grid returns") return new_grid_size, nr_words
def space_to_grid(reduced_2): return stg.space_to_grid_iterative(reduced_2, result_path)
def get_grid(grid_size, data_sample, grid, nr_words): input_template = input_directory_cooc + r"\complete_cooc\_" if data_case_name == r"\limit1000_freq1_small_sample": input_template = r"D:\Users\Lydia\results word cooc\limit1000_freq1\complete_cooc\_" print "at file", for letter in "abcdefghijklmnopqrstuvwxyz": try: print letter, f = open(input_template + letter + ".txt", 'r') line_nr = 0 for line in f: line = line.replace(";\n", "") line = line.split(";") word = line[0] if word not in grid[2]: del line[0] if len(line) == 0: print word, "at line", line_nr, "in file", letter nr_words -= 1 else: best_index = add_to_sim_word(word, line, data_sample, grid) grid[3][best_index].append(word) f.close() except IOError: print letter, print "\n New nr words:", nr_words del data_sample new_grid_size = math.ceil(math.sqrt(nr_words)) ratio = new_grid_size / float(grid_size) ratio_resize = ratio / 1.2 shift_resize_x = (new_grid_size - grid_size * ratio_resize) / 2 + ratio_resize shift_resize_y = (new_grid_size - grid_size * ratio_resize) / 2 + ratio_resize data_space = np.zeros((nr_words, 2)) index = 0 blob_file = open(output_directory + r"\blob_file.txt", "w") for i in range(len(grid[0])): new_pos = np.array([ round(grid[0][i] * ratio_resize + shift_resize_x), round(grid[1][i] * ratio_resize + shift_resize_y) ]) data_space[index, :] = new_pos global_index[index] = [grid[2][i], i] index += 1 assigned = len(grid[3][i]) data_space[index:index + assigned, 0].fill(new_pos[0]) data_space[index:index + assigned, 1].fill(new_pos[1]) random_dist = np.random.rand(assigned, 2) data_space[index:index + assigned, :] = ( random_dist - 0.5) * ratio_resize + data_space[index:index + assigned, :] blob_file.write(str(i) + " " + grid[2][i] + " " + str(len(grid[3][i]))) for elem in grid[3][i]: global_index[index] = [elem, i] index += 1 blob_file.write(" " + elem) blob_file.write("\n") blob_file.close() print "data space shape", data_space.shape print "last index = ", index - 1 used_marker = "o" if nr_words > 1000: used_marker = "." colors = get_colors() coloring = [] color_index = {} for i in range(nr_words): coloring.append(colors[global_index[i][1] % len(colors)]) color_index[i] = colors[global_index[i][1] % len(colors)] image_name = output_directory + r"\stg_init_plot_blobColoring.pdf" fig = plt.figure(figsize=(figure_size, figure_size)) # fig = plt.figure(figsize=(figure_size, figure_size), dpi=figure_dpi) prop_plot = plt.scatter(data_space[:, 1], new_grid_size - 1 - data_space[:, 0], c=coloring, marker=used_marker) if nr_words > 1000: prop_plot.set_edgecolor("none") plt.axis([-1, new_grid_size + 1, -1, new_grid_size + 1]) plt.title("Initial blob coloring") fig.savefig(image_name, bbox_inches='tight') plt.close() print "new grid size", new_grid_size intermediate_grids = output_directory + "\intermediate_grids" if not os.path.exists(intermediate_grids): os.makedirs(intermediate_grids) print "directory made" assignment, gz = stg.space_to_grid_iterative( data_space, intermediate_grids, with_figures=False, blob_nr_keeper=stg.TypeKeeper(color_index)) del data_space print "init final grid and make figure of grid" image_name = output_directory + r"\stg_result_plot_blobColoring.pdf" # fig = plt.figure(figsize=(figure_size, figure_size), dpi=figure_dpi) fig = plt.figure(figsize=(figure_size, figure_size)) for elem in assignment: grid_f[elem[0]][elem[1]] = GridElem(elem[2], elem[0:2], global_index[elem[2]][0], color_index[elem[2]], global_index[elem[2]][1]) global_index[elem[2]] = grid_f[elem[0]][elem[1]] global_name[grid_f[elem[0]][elem[1]].name] = grid_f[elem[0]][elem[1]] prop_plot = plt.scatter(elem[1], new_grid_size - 1 - elem[0], c=color_index[elem[2]], marker=used_marker) if nr_words > 1000: prop_plot.set_edgecolor("none") plt.axis([-1, new_grid_size, -1, new_grid_size]) plt.title("Resulting blob coloring") fig.savefig(image_name, bbox_inches='tight') plt.close() return new_grid_size, nr_words
def get_grid(grid_size, data_sample, grid, nr_words): input_template = input_directory_cooc + r"\complete_cooc\_" if data_case_name == r"\limit1000_freq1_small_sample": input_template = r"D:\Users\Lydia\results word cooc\limit1000_freq1\complete_cooc\_" print "at file", for letter in "abcdefghijklmnopqrstuvwxyz": try: print letter, f = open(input_template+letter+".txt",'r') line_nr = 0 for line in f: line = line.replace(";\n", "") line = line.split(";") word = line[0] if word not in grid[2]: del line[0] if len(line) == 0: print word, "at line", line_nr, "in file", letter nr_words -= 1 else: best_index = add_to_sim_word(word, line, data_sample, grid) grid[3][best_index].append(word) f.close() except IOError: print letter, print "\n New nr words:", nr_words del data_sample new_grid_size = math.ceil(math.sqrt(nr_words)) ratio = new_grid_size/float(grid_size) ratio_resize = ratio/1.2 shift_resize_x = (new_grid_size - grid_size*ratio_resize)/2 + ratio_resize shift_resize_y = (new_grid_size - grid_size*ratio_resize)/2 + ratio_resize data_space = np.zeros((nr_words,2)) index = 0 blob_file = open(output_directory + r"\blob_file.txt", "w") for i in range(len(grid[0])): new_pos = np.array([ round(grid[0][i] * ratio_resize + shift_resize_x),round(grid[1][i] * ratio_resize + shift_resize_y)]) data_space[index,:] = new_pos global_index[index] = [grid[2][i],i] index+=1 assigned = len(grid[3][i]) data_space[index:index+assigned,0].fill(new_pos[0]) data_space[index:index+assigned,1].fill(new_pos[1]) random_dist = np.random.rand(assigned,2) data_space[index:index+assigned,:] = (random_dist-0.5)*ratio_resize + data_space[index:index+assigned,:] blob_file.write(str(i) + " " + grid[2][i] + " " + str(len(grid[3][i]))) for elem in grid[3][i]: global_index[index] = [elem, i] index+=1 blob_file.write(" " + elem) blob_file.write("\n") blob_file.close() print "data space shape", data_space.shape print "last index = ", index-1 used_marker = "o" if nr_words > 1000: used_marker = "." colors = get_colors() coloring = [] color_index = {} for i in range(nr_words): coloring.append( colors[global_index[i][1]%len(colors)] ) color_index[i] = colors[global_index[i][1]%len(colors)] image_name = output_directory + r"\stg_init_plot_blobColoring.pdf" fig = plt.figure(figsize=(figure_size, figure_size)) # fig = plt.figure(figsize=(figure_size, figure_size), dpi=figure_dpi) prop_plot = plt.scatter( data_space[:,1], new_grid_size-1-data_space[:,0], c=coloring, marker=used_marker) if nr_words > 1000: prop_plot.set_edgecolor("none") plt.axis([-1,new_grid_size+1, -1, new_grid_size+1]) plt.title("Initial blob coloring") fig.savefig(image_name, bbox_inches='tight') plt.close() print "new grid size", new_grid_size intermediate_grids = output_directory + "\intermediate_grids" if not os.path.exists(intermediate_grids): os.makedirs(intermediate_grids) print "directory made" assignment, gz = stg.space_to_grid_iterative(data_space, intermediate_grids, with_figures=False, blob_nr_keeper=stg.TypeKeeper(color_index)) del data_space print "init final grid and make figure of grid" image_name = output_directory + r"\stg_result_plot_blobColoring.pdf" # fig = plt.figure(figsize=(figure_size, figure_size), dpi=figure_dpi) fig = plt.figure(figsize=(figure_size, figure_size)) for elem in assignment: grid_f[elem[0]][elem[1]] = GridElem(elem[2], elem[0:2], global_index[elem[2]][0], color_index[elem[2]], global_index[elem[2]][1] ) global_index[elem[2]] = grid_f[elem[0]][elem[1]] prop_plot = plt.scatter(elem[1], new_grid_size-1-elem[0], c=color_index[elem[2]] , marker=used_marker) if nr_words > 1000: prop_plot.set_edgecolor("none") plt.axis([-1,new_grid_size, -1, new_grid_size]) plt.title("Resulting blob coloring") fig.savefig(image_name, bbox_inches='tight') plt.close() return new_grid_size, nr_words