def main(): global log_file os.environ['IRSTLM'] = irstlm_path #setup_train() setup_tune() #setup_test() config = itertools.product(max_phrase_length, order, reordering_language, reordering_directionality, score_options, smoothing, alignment, reordering_orientation, reordering_modeltype) config = [list(e) for e in config] i = 0 for c in config: c.append(i) i = i + 1 pool = multiprocessing.Pool(processes=pool_size) pool_outputs = pool.map(run_star, config) pool.close() pool.join() # run_config(c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8],i) log_file.close() command('python datamine.py') command('cat {0}/out.csv | mail -s "Output" {1}'.format( archive_path, email))
def pcommand(c): '''This function wraps the command module with code for logging :Parameters: - 'c': command to run and log :Returns: - list of lists of configuration arguments ''' logger.info(c) o=command(c) logger.info(o.out) logger.info(o.err) return o
def main(): if len(sys.argv) != 2: print "Usage: python", ' '.join(sys.argv), "config_train.yaml" return y = structures.BuildConfiguration(sys.argv[1]) config = get_run_args(y) shutil.copy(sys.argv[1], y.paths.project) os.environ['IRSTLM'] = y.paths.irstlm setup_train(y) setup_tune(y) setup_test(y) pool = multiprocessing.Pool(processes=y.settings.pool_size) pool_outputs = pool.map(run_star, config) pool.close() pool.join() datamine.write_data(y.paths.project) command('cat {0}/data.csv | mail -s "Output" {1}'.format(y.paths.project, y.settings.email))
def pcommand(c): '''This function wraps the command module with code for logging :Parameters: - 'c': command to run and log :Returns: - list of lists of configuration arguments ''' logger.info(c) o = command(c) logger.info(o.out) logger.info(o.err) return o
def pcommand(c): '''This function wraps the command module with logging functionality :Parameters: - 'c': command to run and log - 'log': log file to log all actions to :Returns: - a command result object ''' logger.info(c) o = command(c) if len(o.out) != 0: logger.info(o.out) if len(o.err) != 0: logger.info(o.err) return o
def main(): global log_file os.environ['IRSTLM'] = irstlm_path #setup_train() setup_tune() #setup_test() config=itertools.product(max_phrase_length, order, reordering_language, reordering_directionality, score_options, smoothing, alignment, reordering_orientation, reordering_modeltype) config=[list(e) for e in config] i=0 for c in config: c.append(i) i=i+1 pool=multiprocessing.Pool(processes=pool_size) pool_outputs = pool.map(run_star, config) pool.close() pool.join() # run_config(c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8],i) log_file.close() command('python datamine.py') command('cat {0}/out.csv | mail -s "Output" {1}'.format(archive_path, email))
def run_config(l_len, l_order, l_lang, l_direct, l_score, l_smoothing, l_align, l_orient, l_model, i): i=str(i) run_start=time.time(); lm_path = "{0}/{1}/lm".format(archive_path,i) working_path = "{0}/{1}/working".format(archive_path,i) c=command("mkdir {0}/{1}".format(archive_path,i)) print(c.out) print(c.err) i_log = open("{0}/{1}/{1}.ilog.txt".format(archive_path,i),"w",1) c_log = open("{0}/{1}/{1}.clog.txt".format(archive_path,i),"w",1) log(i_log, "i = {0}".format(i)); log(i_log, "Start_Time = {0}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) log(i_log, "Order = {0}".format(l_order)) log(i_log, "Smoothing = {0}".format(l_smoothing)) log(i_log, "ScoreOptions = {0}".format(l_score)) log(i_log, "Alignment = {0}".format(l_align)) log(i_log, "ReorderingModeltype = {0}".format(l_model)) log(i_log, "ReorderingOrientation = {0}".format(l_orient)) log(i_log, "ReorderingDirectionality = {0}".format(l_direct)) log(i_log, "ReorderingLanguage = {0}".format(l_lang)) log(i_log, "MaxPhraseLength = {0}".format(l_len)) log(i_log, "") #Create language model lm_start = time.time() pcommand("mkdir {0}".format(lm_path), c_log) pcommand("{0}/bin/add-start-end.sh < {1}/{2}.true.{3} > {4}/{2}.sb.{3}".format(irstlm_path, corpora_path, train_name, foreign, lm_path), c_log) pcommand("{0}/bin/build-lm.sh -i {5}/{1}.sb.{4} -t {5}/tmp -p -n {2} -s {3} -o {5}/{1}.ilm.{4}.gz".format(irstlm_path,train_name, l_order, l_smoothing, foreign, lm_path), c_log) pcommand("{0}/bin/compile-lm --text {3}/{1}.ilm.{2}.gz {3}/{1}.arpa.{2}".format(irstlm_path,train_name, foreign, lm_path), c_log) pcommand("{0}/bin/build_binary -i {3}/{1}.arpa.es {3}/{1}.blm.{2}".format(moses_path,train_name, foreign, lm_path), c_log) pcommand("{0}/bin/add-start-end.sh < {1}/{2}.pos.{3} > {4}/{2}.pos.sb.{3}".format(irstlm_path, corpora_path, train_name, foreign, lm_path), c_log) pcommand("{0}/bin/build-lm.sh -i {5}/{1}.pos.sb.{4} -t {5}/tmp -p -n {2} -s {3} -o {5}/{1}.pos.ilm.{4}.gz".format(irstlm_path,train_name, l_order, l_smoothing, foreign, lm_path), c_log) pcommand("{0}/bin/compile-lm --text {3}/{1}.pos.ilm.{2}.gz {3}/{1}.pos.arpa.{2}".format(irstlm_path,train_name, foreign, lm_path), c_log) pcommand("{0}/bin/build_binary -i {3}/{1}.pos.arpa.es {3}/{1}.pos.blm.{2}".format(moses_path,train_name, foreign, lm_path), c_log) o=pcommand("echo 'Is this a Spanish sentance?' | {0}/bin/query {1}/{2}.blm.{3}".format(moses_path, lm_path, train_name, foreign), c_log) log(i_log,"") log(i_log, o.out) log(i_log, o.err) log(i_log, "LM_Time = {0}".format(str(time.time()-lm_start))) log(i_log, "LM_Time_HMS = {0}".format(str(datetime.timedelta(seconds=(time.time()-lm_start))))) #Train the model train_start=time.time() log(i_log, "Train_Start_Time = {0}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) pcommand("mkdir {0}".format(working_path), c_log) pcommand("{0}/scripts/training/train-model.perl -root-dir {15}/train -corpus {1}/{2}.factored.clean -f en -e {3} --score-options \'{4}\' -alignment {5} -reordering {6}-{7}-{8}-{9} -lm 0:{10}:{11}/{12}.blm.{13}:1 -lm 1:{10}:{11}/{12}.pos.blm.{13}:1 --translation-factors 0-0,1 -mgiza -mgiza-cpus {14} -external-bin-dir {0}/tools -cores {14} --parallel --parts 3 2>&1 > {15}/training.out".format(moses_path, corpora_path, train_name, foreign, l_score, l_align, l_model, l_orient, l_direct, l_lang, l_order, lm_path, train_name, foreign, threads, working_path), c_log) log(i_log, "Train_Time = {0}".format(str(time.time()-lm_start))) log(i_log, "Train_Time_HMS = {0}".format(str(datetime.timedelta(seconds=(time.time()-lm_start))))) print("trained") #Tune the model tune_start=time.time() log(i_log, "Tune_Start_Time = {0}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) pcommand("{0}/scripts/training/mert-moses.pl {1}/{2}.true.en {1}/{2}.true.{3} {0}/bin/moses {4}/train/model/moses.ini --working-dir {4}/mert-work --mertdir {0}/bin/ 2>&1 > {4}/mert.out".format(moses_path, corpora_path, tune_name, foreign, working_path), c_log) log(i_log, "Tune_Time = {0}".format(str(time.time()-tune_start))) log(i_log, "Tune_Time_HMS = {0}".format(str(datetime.timedelta(seconds=(time.time()-tune_start))))) print("tuned") #Test the model test_start=time.time() log(i_log, "Test_Start_Time = {0}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) pcommand("{0}/scripts/training/filter-model-given-input.pl {3}/filtered-{1} {3}/mert-work/moses.ini {2}/{1}.true.en -Binarizer {0}/bin/processPhraseTable".format(moses_path, test_name, corpora_path, working_path), c_log) pcommand("{0}/bin/moses -f {1}/filtered-{3}/moses.ini < {2}/{3}.true.en > {1}/{3}.translated.{4} 2> {1}/{3}.out".format(moses_path, working_path, corpora_path, test_name, foreign), c_log) c=pcommand("{0}/scripts/generic/multi-bleu.perl -lc {1}/{2}.true.{4} < {3}/{2}.translated.{4}".format(moses_path, corpora_path, test_name, working_path, foreign), c_log) log(i_log, c.out) print("tested") log(i_log, "Test_Time = {0}".format(str(time.time()-test_start))) log(i_log, "Test_Time_HMS = {0}".format(str(datetime.timedelta(seconds=(time.time()-test_start))))) log(i_log, "Run_Time_HMS = {0}".format(str(datetime.timedelta(seconds=(time.time()-run_start))))) log(i_log, "End_Time = {0}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) log(i_log, "Done = {0}".format(i)) i_log.close() c_log.close()
def pcommand(c, log): log.write(c+"\n") o=command(c) log.write(o.out+"\n") log.write(o.err+"\n") return o
def pcommand(c, log): log.write(c + "\n") o = command(c) log.write(o.out + "\n") log.write(o.err + "\n") return o
def run_config(l_len, l_order, l_lang, l_direct, l_score, l_smoothing, l_align, l_orient, l_model, i): i = str(i) run_start = time.time() lm_path = "{0}/{1}/lm".format(archive_path, i) working_path = "{0}/{1}/working".format(archive_path, i) c = command("mkdir {0}/{1}".format(archive_path, i)) print(c.out) print(c.err) i_log = open("{0}/{1}/{1}.ilog.txt".format(archive_path, i), "w", 1) c_log = open("{0}/{1}/{1}.clog.txt".format(archive_path, i), "w", 1) log(i_log, "i = {0}".format(i)) log( i_log, "Start_Time = {0}".format( datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) log(i_log, "Order = {0}".format(l_order)) log(i_log, "Smoothing = {0}".format(l_smoothing)) log(i_log, "ScoreOptions = {0}".format(l_score)) log(i_log, "Alignment = {0}".format(l_align)) log(i_log, "ReorderingModeltype = {0}".format(l_model)) log(i_log, "ReorderingOrientation = {0}".format(l_orient)) log(i_log, "ReorderingDirectionality = {0}".format(l_direct)) log(i_log, "ReorderingLanguage = {0}".format(l_lang)) log(i_log, "MaxPhraseLength = {0}".format(l_len)) log(i_log, "") #Create language model lm_start = time.time() pcommand("mkdir {0}".format(lm_path), c_log) pcommand( "{0}/bin/add-start-end.sh < {1}/{2}.true.{3} > {4}/{2}.sb.{3}".format( irstlm_path, corpora_path, train_name, foreign, lm_path), c_log) pcommand( "{0}/bin/build-lm.sh -i {5}/{1}.sb.{4} -t {5}/tmp -p -n {2} -s {3} -o {5}/{1}.ilm.{4}.gz" .format(irstlm_path, train_name, l_order, l_smoothing, foreign, lm_path), c_log) pcommand( "{0}/bin/compile-lm --text {3}/{1}.ilm.{2}.gz {3}/{1}.arpa.{2}". format(irstlm_path, train_name, foreign, lm_path), c_log) pcommand( "{0}/bin/build_binary -i {3}/{1}.arpa.es {3}/{1}.blm.{2}".format( moses_path, train_name, foreign, lm_path), c_log) pcommand( "{0}/bin/add-start-end.sh < {1}/{2}.pos.{3} > {4}/{2}.pos.sb.{3}". format(irstlm_path, corpora_path, train_name, foreign, lm_path), c_log) pcommand( "{0}/bin/build-lm.sh -i {5}/{1}.pos.sb.{4} -t {5}/tmp -p -n {2} -s {3} -o {5}/{1}.pos.ilm.{4}.gz" .format(irstlm_path, train_name, l_order, l_smoothing, foreign, lm_path), c_log) pcommand( "{0}/bin/compile-lm --text {3}/{1}.pos.ilm.{2}.gz {3}/{1}.pos.arpa.{2}" .format(irstlm_path, train_name, foreign, lm_path), c_log) pcommand( "{0}/bin/build_binary -i {3}/{1}.pos.arpa.es {3}/{1}.pos.blm.{2}". format(moses_path, train_name, foreign, lm_path), c_log) o = pcommand( "echo 'Is this a Spanish sentance?' | {0}/bin/query {1}/{2}.blm.{3}". format(moses_path, lm_path, train_name, foreign), c_log) log(i_log, "") log(i_log, o.out) log(i_log, o.err) log(i_log, "LM_Time = {0}".format(str(time.time() - lm_start))) log( i_log, "LM_Time_HMS = {0}".format( str(datetime.timedelta(seconds=(time.time() - lm_start))))) #Train the model train_start = time.time() log( i_log, "Train_Start_Time = {0}".format( datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) pcommand("mkdir {0}".format(working_path), c_log) pcommand( "{0}/scripts/training/train-model.perl -root-dir {15}/train -corpus {1}/{2}.factored.clean -f en -e {3} --score-options \'{4}\' -alignment {5} -reordering {6}-{7}-{8}-{9} -lm 0:{10}:{11}/{12}.blm.{13}:1 -lm 1:{10}:{11}/{12}.pos.blm.{13}:1 --translation-factors 0-0,1 -mgiza -mgiza-cpus {14} -external-bin-dir {0}/tools -cores {14} --parallel --parts 3 2>&1 > {15}/training.out" .format(moses_path, corpora_path, train_name, foreign, l_score, l_align, l_model, l_orient, l_direct, l_lang, l_order, lm_path, train_name, foreign, threads, working_path), c_log) log(i_log, "Train_Time = {0}".format(str(time.time() - lm_start))) log( i_log, "Train_Time_HMS = {0}".format( str(datetime.timedelta(seconds=(time.time() - lm_start))))) print("trained") #Tune the model tune_start = time.time() log( i_log, "Tune_Start_Time = {0}".format( datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) pcommand( "{0}/scripts/training/mert-moses.pl {1}/{2}.true.en {1}/{2}.true.{3} {0}/bin/moses {4}/train/model/moses.ini --working-dir {4}/mert-work --mertdir {0}/bin/ 2>&1 > {4}/mert.out" .format(moses_path, corpora_path, tune_name, foreign, working_path), c_log) log(i_log, "Tune_Time = {0}".format(str(time.time() - tune_start))) log( i_log, "Tune_Time_HMS = {0}".format( str(datetime.timedelta(seconds=(time.time() - tune_start))))) print("tuned") #Test the model test_start = time.time() log( i_log, "Test_Start_Time = {0}".format( datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) pcommand( "{0}/scripts/training/filter-model-given-input.pl {3}/filtered-{1} {3}/mert-work/moses.ini {2}/{1}.true.en -Binarizer {0}/bin/processPhraseTable" .format(moses_path, test_name, corpora_path, working_path), c_log) pcommand( "{0}/bin/moses -f {1}/filtered-{3}/moses.ini < {2}/{3}.true.en > {1}/{3}.translated.{4} 2> {1}/{3}.out" .format(moses_path, working_path, corpora_path, test_name, foreign), c_log) c = pcommand( "{0}/scripts/generic/multi-bleu.perl -lc {1}/{2}.true.{4} < {3}/{2}.translated.{4}" .format(moses_path, corpora_path, test_name, working_path, foreign), c_log) log(i_log, c.out) print("tested") log(i_log, "Test_Time = {0}".format(str(time.time() - test_start))) log( i_log, "Test_Time_HMS = {0}".format( str(datetime.timedelta(seconds=(time.time() - test_start))))) log( i_log, "Run_Time_HMS = {0}".format( str(datetime.timedelta(seconds=(time.time() - run_start))))) log( i_log, "End_Time = {0}".format( datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) log(i_log, "Done = {0}".format(i)) i_log.close() c_log.close()