def initialize_weights(): global WA, W, curr_W_index print('counting words') uniq_word = open(uniq_words_file) uniq_path = open(uniq_paths_file) curr_W_index = 0 for a in A: WA[a] = curr_W_index curr_W_index += 1 for p in uniq_path: p = p.strip('\n') WA[p] = curr_W_index curr_W_index += 1 paths.append(p) for q in uniq_word: for word in q.split(): if word not in W: W[word] = curr_W_index curr_W_index += 1 print('initializing weights') ###THIS ONE MAKES NEW RANDOM WEIGHTS #stdout.flush() #qae.initialize(max_W_index, features) qae.loadweights() print("curr_W_index, length W, length WA, features ", curr_W_index, len(W), len(WA), features) print('%s Words' % len(W)) print('%s Answers' % len(A)) print('%s Paraphrase Sets' % len(P))
def __init__(self): self.webquestions_test_file = '../Data/wq.txt' #self.webquestions_test_file = '../Preprocess/webquestions.examples.train.json' self.freebase_key_file = '../Data/mid-en-key.txt' self.freebase_name_file = '../Data/mid-en-name.txt' self.freebase_name_datafile = '../Data/mid-en-name.dat' self.word_weight_index_datafile = '../Data/words.dat' self.answer_weight_index_datafile = '../Data/word_answers.dat' conn = sqlite3.connect('../Data/Freebase.db') self.c = conn.cursor() self.questions, self.answers, self.problems, self.subjects = [], [], [], [] self.loading_webquestions() self.initialize_freebase_key_mid() self.initialize_freebase_names() with open(self.word_weight_index_datafile, 'rb') as f: self.W = pickle.load(f) with open(self.answer_weight_index_datafile, 'rb') as f: self.WA = pickle.load(f) info("Attempting to load weights.") qae.loadweights() info("Loaded weights.")