def get_job_embs( title_id, emb_dim=300, model_path="/data/rali7/Tmp/solimanz/data/wikipedia/wiki.en.bin"): """ Returns a matrix of job title embeddings """ print("Loading fastText model...") model = FastText(model_path) print("Model successfull loaded :-D") embeddings = np.zeros((len(title_id), emb_dim), dtype=np.float32) for title in title_id.keys(): vec = model.get_sentence_vector(title) embeddings[title_id[title], :] = vec return embeddings
train_targets) np.save(os.path.join(ds_path, "bow", "test_targets.npy"), test_targets) print("All done! :-D") if args.representation == 'title_emb' or args.representation == 'all': print("Loading pre trained fastText model...") model = FastText( "/data/rali7/Tmp/solimanz/data/wikipedia/wiki.en.bin") X_train = np.zeros((len(train), 300), dtype=np.float32) X_test = np.zeros((len(test), 300), dtype=np.float32) for i, job_hist in enumerate(train): vec = model.get_sentence_vector(job_hist) X_train[i, :] = vec for i, job_hist in enumerate(test): vec = model.get_sentence_vector(job_hist) X_test[i, :] = vec create_path(os.path.join(ds_path, 'emb')) print("Saving train and test embeddings...") np.save(os.path.join(ds_path, "emb", "train.npy"), X_train) np.save(os.path.join(ds_path, "emb", "test.npy"), X_test) np.save(os.path.join(ds_path, "emb", "train_targets.npy"), train_targets) np.save(os.path.join(ds_path, "emb", "test_targets.npy"), test_targets)