예제 #1
0
파일: utils.py 프로젝트: ddehueck/CrossWalk
def load_examples(args, edgelist_path, G):
    # Unpack params
    n_walks, walk_len = args.get('n_walks'), args.get('walk_len')
    window_size = args.get('window_size')

    # Filenames for examples to be saved to
    param_str = f'{n_walks}_walks_{walk_len}_walk_len_{window_size}_ws'
    example_pth = f'data/graph_examples_{param_str}.h5'
    dict_pth = f'data/graph_dictionary_{param_str}.gensim'

    if os.path.isfile(example_pth) and os.path.isfile(dict_pth):
        print(f'Loading examples from: {example_pth}')
        print(f'Loading dictionary from: {dict_pth}')
        return example_pth, dict_pth

    # Generate randomwalks
    dictionary, walks = generate_walks(G, n_walks, walk_len)

    # Create Examples
    examples = []
    for walk in tqdm(walks, desc='Generating Examples:', total=len(walks)):
        windows = strided_windows(walk, window_size)
        for w in windows:
            center, context = w[0], w[1:]  # Add entity id as well
            # convert to global entity ids!
            _global = int(dictionary[walk[0]])
            _center = int(dictionary[center])
            _context = np.array([int(dictionary[c]) for c in context])
            # save example
            examples.append([_global, _center, _context])

    # Save Examples!
    save_examples(example_pth, examples)
    save_dictionary(dict_pth, dictionary)
    return example_pth, dict_pth
예제 #2
0
 def test_strided_windows2(self):
     input_arr = np.arange(10)
     out = utils.strided_windows(input_arr, 5)
     expected = self.arr10_5.copy()
     self._assert_arrays_equal(expected, out)
     out[0, 0] = 10
     self.assertEqual(10, input_arr[0], "should make view rather than copy")
예제 #3
0
 def test_strided_windows2(self):
     input_arr = np.arange(10)
     out = utils.strided_windows(input_arr, 5)
     expected = self.arr10_5.copy()
     self._assert_arrays_equal(expected, out)
     out[0, 0] = 10
     self.assertEqual(10, input_arr[0], "should make view rather than copy")
예제 #4
0
파일: utils.py 프로젝트: ddehueck/CrossWalk
def load_examples(args, df_path):
    # Unpack params
    window_size = args.get('window_size')

    # Filenames for examples to be saved to
    param_str = f'{window_size}_ws'
    example_pth = f'data/lang_examples_{param_str}.h5'
    dict_pth = f'data/lang_dictionary_{param_str}.gensim'

    # Check if these files already exist
    if os.path.isfile(example_pth) and os.path.isfile(dict_pth):
        print(f'Loading examples from: {example_pth}')
        print(f'Loading dictionary from: {dict_pth}')
        return example_pth, dict_pth

    # Tokenize documents
    dictionary, walks = tokenize_files(df_path)

    # Generate examples
    examples = []
    for i, walk in tqdm(enumerate(walks),
                        desc='Generating Examples:',
                        total=len(walks)):
        windows = strided_windows(walk, window_size)
        for w in windows:
            center, context = w[0], w[1:]  # Add entity id as well
            examples.append([i, center, context])

    # Save examples
    save_examples(example_pth, examples)
    save_dictionary(dict_pth, dictionary)
    return example_pth, dict_pth
예제 #5
0
 def test_strided_windows1(self):
     out = utils.strided_windows(range(5), 2)
     expected = np.array([
         [0, 1],
         [1, 2],
         [2, 3],
         [3, 4]
     ])
     self._assert_arrays_equal(expected, out)
예제 #6
0
 def test_strided_windows1(self):
     out = utils.strided_windows(range(5), 2)
     expected = np.array([
         [0, 1],
         [1, 2],
         [2, 3],
         [3, 4]
     ])
     self._assert_arrays_equal(expected, out)
예제 #7
0
 def test_strided_windows_window_size_equals_size(self):
     input_arr = np.array(['this', 'is', 'test'], dtype='object')
     out = utils.strided_windows(input_arr, 3)
     expected = np.array([input_arr.copy()])
     self._assert_arrays_equal(expected, out)
예제 #8
0
 def test_strided_windows_window_size_exceeds_size(self):
     input_arr = np.array(['this', 'is', 'test'], dtype='object')
     out = utils.strided_windows(input_arr, 4)
     expected = np.ndarray((0, 0))
     self._assert_arrays_equal(expected, out)
예제 #9
0
 def test_strided_windows_window_size_equals_size(self):
     input_arr = np.array(['this', 'is', 'test'], dtype='object')
     out = utils.strided_windows(input_arr, 3)
     expected = np.array([input_arr.copy()])
     self._assert_arrays_equal(expected, out)
예제 #10
0
 def test_strided_windows_window_size_exceeds_size(self):
     input_arr = np.array(['this', 'is', 'test'], dtype='object')
     out = utils.strided_windows(input_arr, 4)
     expected = np.ndarray((0, 0))
     self._assert_arrays_equal(expected, out)