def train_wrapper(seed_fn, source_fn, target_fn, reverse=False, mx_path=None, train_size=5000): logging.info("Training...") seed_trans = read_dict(seed_fn, reverse=reverse) #we only need to load the vectors for the words in the training data #semantic spaces contain additional words source_words = set(seed_trans.iterkeys()) target_words = set().union(*seed_trans.itervalues()) source_sp = Space.build(source_fn, lexicon=source_words) source_sp.normalize() target_sp = Space.build(target_fn, lexicon=target_words) target_sp.normalize() logging.info("Learning the translation matrix") tm, used_for_train = train_tm(source_sp, target_sp, seed_trans, train_size) mx_path = default_output_fn(mx_path, seed_fn, source_fn, target_fn,) logging.info("Saving the translation matrix to {}".format(mx_path)) np.save('{}.npy'.format(mx_path), tm) pickle.dump(used_for_train, open('{}.train_wds'.format(mx_path), mode='w')) return tm, used_for_train
def load_tr_mx(self): if self.args.mx_path: if self.tr_mx or self.exclude_from_test: raise Exception( "Translation mx or training words specified amibiguously.") else: self.args.mx_path = default_output_fn( self.args.mx_path, self.args.seed_fn, self.args.source_fn, self.args.target_fn) logging.info("Loading from {}".format(self.args.mx_path)) self.exclude_from_test = pickle.load(open( '{}.train_wds'.format(self.args.mx_path))) self.tr_mx = np.load('{}.npy'.format(self.args.mx_path)) elif self.tr_mx is None or not self.exclude_from_test: raise Exception('Translation matrix or training words unspecified')