def load_examples(args, edgelist_path, G): # Unpack params n_walks, walk_len = args.get('n_walks'), args.get('walk_len') window_size = args.get('window_size') # Filenames for examples to be saved to param_str = f'{n_walks}_walks_{walk_len}_walk_len_{window_size}_ws' example_pth = f'data/graph_examples_{param_str}.h5' dict_pth = f'data/graph_dictionary_{param_str}.gensim' if os.path.isfile(example_pth) and os.path.isfile(dict_pth): print(f'Loading examples from: {example_pth}') print(f'Loading dictionary from: {dict_pth}') return example_pth, dict_pth # Generate randomwalks dictionary, walks = generate_walks(G, n_walks, walk_len) # Create Examples examples = [] for walk in tqdm(walks, desc='Generating Examples:', total=len(walks)): windows = strided_windows(walk, window_size) for w in windows: center, context = w[0], w[1:] # Add entity id as well # convert to global entity ids! _global = int(dictionary[walk[0]]) _center = int(dictionary[center]) _context = np.array([int(dictionary[c]) for c in context]) # save example examples.append([_global, _center, _context]) # Save Examples! save_examples(example_pth, examples) save_dictionary(dict_pth, dictionary) return example_pth, dict_pth
def test_strided_windows2(self): input_arr = np.arange(10) out = utils.strided_windows(input_arr, 5) expected = self.arr10_5.copy() self._assert_arrays_equal(expected, out) out[0, 0] = 10 self.assertEqual(10, input_arr[0], "should make view rather than copy")
def load_examples(args, df_path): # Unpack params window_size = args.get('window_size') # Filenames for examples to be saved to param_str = f'{window_size}_ws' example_pth = f'data/lang_examples_{param_str}.h5' dict_pth = f'data/lang_dictionary_{param_str}.gensim' # Check if these files already exist if os.path.isfile(example_pth) and os.path.isfile(dict_pth): print(f'Loading examples from: {example_pth}') print(f'Loading dictionary from: {dict_pth}') return example_pth, dict_pth # Tokenize documents dictionary, walks = tokenize_files(df_path) # Generate examples examples = [] for i, walk in tqdm(enumerate(walks), desc='Generating Examples:', total=len(walks)): windows = strided_windows(walk, window_size) for w in windows: center, context = w[0], w[1:] # Add entity id as well examples.append([i, center, context]) # Save examples save_examples(example_pth, examples) save_dictionary(dict_pth, dictionary) return example_pth, dict_pth
def test_strided_windows1(self): out = utils.strided_windows(range(5), 2) expected = np.array([ [0, 1], [1, 2], [2, 3], [3, 4] ]) self._assert_arrays_equal(expected, out)
def test_strided_windows_window_size_equals_size(self): input_arr = np.array(['this', 'is', 'test'], dtype='object') out = utils.strided_windows(input_arr, 3) expected = np.array([input_arr.copy()]) self._assert_arrays_equal(expected, out)
def test_strided_windows_window_size_exceeds_size(self): input_arr = np.array(['this', 'is', 'test'], dtype='object') out = utils.strided_windows(input_arr, 4) expected = np.ndarray((0, 0)) self._assert_arrays_equal(expected, out)