('%s_vs_deepwalk_distcorr' % method): compute_distance_correlation(topology_weights[method], topology_weights['deepwalk'], tokens, unattacked_indx) }) exp_result.update({ ('%s_vs_glove_distcorr' % method): compute_distance_correlation(topology_weights[method], topology_weights['glove'], tokens, unattacked_indx) }) results.append(exp_result) # Compute cooccurrences (cooccurrence_list, index_vocab_list, vocab_index_lookup, tokenized_cooccurrences) = count_cooccurrences(walks_prime, 5) # Compute mrr curves for method in topology_weights: normalize_rows = False normalize_cols = False exp_result.update({ ('%s_mrr_curve_full' % method): [ compute_mrr_curve(topology_weights[method], tokenized_cooccurrences, tokens, normalize_rows=normalize_rows, normalize_cols=normalize_cols) ] }) exp_result.update({
def __init__(self, vector_size, vocab_filename=None, covariate_size=0, random_seed=12345, init_weight_dir=None, random_walks=None, covariate_data=None, window_size=5): """Initializes the data reading and model variables. Args: vector_size: size of the word vectors. vocab_filename: filename for getting word tokens. covariate_size: size of the covariate embedding dimension random_seed: seed the initialization generator init_weight_dir: directory to pull initial weights from. defaults to a uniform initializer if none. random_walks: a list of tokenized sentences covariate_data: a keyed list of float lists, where each key identifies a token in the corpus, and each float list is a row of covariate data window_size: window size to use for cooccurrence counting, if needed Returns: (none) """ print('setting up basic stuff...') # Get word tokens self._vector_size = vector_size self._covariate_size = covariate_size self._tokens = [] self._vocab_index_lookup = None if vocab_filename: with open(vocab_filename, 'r') as f: for line in f: self._tokens.append(line.split()[0]) self._vocab_index_lookup = dict( zip(self._tokens, list(range(len(self._tokens))))) self._cooccurrences = None self._cooccurrence_dict = None print('loading or computing co-occurrences...') if random_walks: (self._cooccurrences, self._tokens, self._vocab_index_lookup, self._cooccurrence_dict) = glove_util.count_cooccurrences( random_walks, window_size, self._vocab_index_lookup) self._vocab_size = len(self._tokens) # Get covariate data print('setting other placeholders...') if covariate_data is not None: self._covariate_data = np.array([covariate_data[t] for t in self._tokens]) # Placeholders for parameter tensors and other trackers io_dict = {'input': None, 'outpt': None} self._word = copy.deepcopy(io_dict) self._bias = copy.deepcopy(io_dict) self._iter = 0 self._sum_cost = 0 self._sum_adv_cost_g = 0 self._sum_adv_cost_d = 0 self._random_seed = random_seed self._init_weight_dir = init_weight_dir # Pointers to variables needed for covariate model self._cvrt = copy.deepcopy(io_dict) self._cvrt_transformation = copy.deepcopy(io_dict) # Initialize the cooccurrence read format self._cooccurrence_fmt = 'iid' self._cooccurrence_fmt_length = struct.calcsize(self._cooccurrence_fmt) self._struct_unpack = struct.Struct(self._cooccurrence_fmt).unpack_from