return word_vect_matrix source_language = 'spanish' target_language = 'english07' pair_filename = 'word_pairs/es-en.pair' reverse_pair = False target_vectors, target_vect_size = word_vect_loader.load('pos_data/'+target_language+'.train.sent.vec') source_vectors, source_vect_size = word_vect_loader.load('pos_data/'+source_language+'.train.sent.vec') assert source_vect_size == target_vect_size vect_size = source_vect_size print 'loading' source_text_sentences = load_and_save.read_sentences_from_file('pos_data/conll-'+source_language+'.pos') source_sentences, source_words, source_sentences_pos, _ = load_and_save.integer_sentences(source_text_sentences, pos=universal_pos_tags, max_words=10000) source_test_sentences = load_and_save.read_sentences_from_file('pos_data/conll-'+source_language+'-test.pos') source_test_sentences, _, source_test_sentences_pos, _ = load_and_save.integer_sentences(source_test_sentences, pos=universal_pos_tags, words=source_words) target_text_sentences = load_and_save.read_sentences_from_file('pos_data/conll-'+target_language+'-test.pos') target_sentences, target_words, target_sentences_pos, _ = load_and_save.integer_sentences(target_text_sentences, pos=universal_pos_tags, max_words=10000) source_vector_matrix = make_vector_matrix(source_words, source_vectors, vect_size) target_vector_matrix = make_vector_matrix(target_words, target_vectors, vect_size) print 'finding rotation' translation_pairs = [] pair_file = open(pair_filename) for line in pair_file: split = line.split() if split[0] in source_words and split[2] in target_words: