def get_relevant_embeddings_filename(eval_data_filename, embeddings_filename): # We only need embeddings for a subset of word types. Copy the relevant embeddings in a new plain file. relevant_embeddings_filename = os.path.join(os.path.dirname(__file__), 'temp', str(random.randint(100000, 999999))) relevant_word_types = set(get_relevant_word_types(eval_data_filename)) with gzopen(embeddings_filename) as all_embeddings_file: with open(relevant_embeddings_filename, mode='w') as relevant_embeddings_file: for line in all_embeddings_file: line = line.decode('utf8') if line.split(' ')[0] not in relevant_word_types: continue line = line.encode('utf8') relevant_embeddings_file.write(line) return relevant_embeddings_filename