def do_work(corpus, features, svd_dims): prefix = '/lustre/scratch/inf/mmb28/FeatureExtractionToolkit' name = 'wiki' if corpus == 11 else 'gigaw' # where should output be written svd_appendage = '' if svd_dims == 0 else '-%d' % svd_dims output_file = os.path.join(prefix, 'exp%d-%d-composed-ngrams-ppmi-svd' % (corpus, features), 'AN_NN_%s%s_Observed.events.filtered.strings' % (name, svd_appendage)) # contains SVD-reduced N,J and NP observed vectors, built by other script vectors_file = '%s/exp%d-%db/exp%d-with-obs-phrases-SVD%d.events.filtered.strings' % \ (prefix, corpus, features, corpus, svd_dims) force_symlink(vectors_file, output_file)
def run_socher_code(): # symlink the file Socher's code expects to where the list of phrases I'm interested is force_symlink(phrases_to_compose, socher_input_file) with temp_chdir(socher_base_dir): run_and_log_output('./phrase2Vector.sh') # this takes a while