Ejemplo n.º 1
0
def main():
    global log_file
    os.environ['IRSTLM'] = irstlm_path
    #setup_train()
    setup_tune()
    #setup_test()

    config = itertools.product(max_phrase_length, order, reordering_language,
                               reordering_directionality, score_options,
                               smoothing, alignment, reordering_orientation,
                               reordering_modeltype)
    config = [list(e) for e in config]
    i = 0
    for c in config:
        c.append(i)
        i = i + 1

    pool = multiprocessing.Pool(processes=pool_size)
    pool_outputs = pool.map(run_star, config)
    pool.close()
    pool.join()
    # run_config(c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8],i)

    log_file.close()
    command('python datamine.py')
    command('cat {0}/out.csv | mail -s "Output" {1}'.format(
        archive_path, email))
Ejemplo n.º 2
0
def pcommand(c):
    '''This function wraps the command module with code for logging
    :Parameters:
        - 'c': command to run and log
    :Returns:
        - list of lists of configuration arguments
    '''
    logger.info(c)
    o=command(c)
    logger.info(o.out)
    logger.info(o.err)
    return o
Ejemplo n.º 3
0
def main():
    if len(sys.argv) != 2:
        print "Usage: python", ' '.join(sys.argv), "config_train.yaml"
        return
 
    y = structures.BuildConfiguration(sys.argv[1]) 

    config = get_run_args(y)
    shutil.copy(sys.argv[1], y.paths.project)
    os.environ['IRSTLM'] = y.paths.irstlm
    
    setup_train(y)
    setup_tune(y)
    setup_test(y)
    
    pool = multiprocessing.Pool(processes=y.settings.pool_size)
    pool_outputs = pool.map(run_star, config)
    pool.close()
    pool.join()
    datamine.write_data(y.paths.project)
    command('cat {0}/data.csv | mail -s "Output" {1}'.format(y.paths.project, y.settings.email))
Ejemplo n.º 4
0
def pcommand(c):
    '''This function wraps the command module with code for logging
    :Parameters:
        - 'c': command to run and log
    :Returns:
        - list of lists of configuration arguments
    '''
    logger.info(c)
    o = command(c)
    logger.info(o.out)
    logger.info(o.err)
    return o
Ejemplo n.º 5
0
def pcommand(c):
    '''This function wraps the command module with logging functionality
    :Parameters:
        - 'c': command to run and log
        - 'log': log file to log all actions to
    :Returns:
        -  a command result object
    '''
    
    logger.info(c)
    o = command(c)
    if len(o.out) != 0: logger.info(o.out)
    if len(o.err) != 0: logger.info(o.err)
    return o
Ejemplo n.º 6
0
def main():
    global log_file
    os.environ['IRSTLM'] = irstlm_path
    #setup_train()
    setup_tune()
    #setup_test()
    
    config=itertools.product(max_phrase_length, order, reordering_language, reordering_directionality, score_options, smoothing, alignment, reordering_orientation, reordering_modeltype)
    config=[list(e) for e in config]
    i=0
    for c in config:
        c.append(i)
        i=i+1
   

    pool=multiprocessing.Pool(processes=pool_size)
    pool_outputs = pool.map(run_star, config)
    pool.close()
    pool.join()
    # run_config(c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8],i)
    
    log_file.close()
    command('python datamine.py')
    command('cat {0}/out.csv | mail -s "Output" {1}'.format(archive_path, email))
Ejemplo n.º 7
0
def run_config(l_len, l_order, l_lang, l_direct, l_score, l_smoothing, l_align, l_orient, l_model,  i):
 
    i=str(i)
    run_start=time.time();
    lm_path = "{0}/{1}/lm".format(archive_path,i)
    working_path = "{0}/{1}/working".format(archive_path,i)
    
    c=command("mkdir {0}/{1}".format(archive_path,i))
    print(c.out)
    print(c.err)
    
    i_log = open("{0}/{1}/{1}.ilog.txt".format(archive_path,i),"w",1)
    c_log = open("{0}/{1}/{1}.clog.txt".format(archive_path,i),"w",1)
    
    log(i_log, "i = {0}".format(i));
    log(i_log, "Start_Time = {0}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
    log(i_log, "Order = {0}".format(l_order))
    log(i_log, "Smoothing = {0}".format(l_smoothing))
    log(i_log, "ScoreOptions = {0}".format(l_score))
    log(i_log, "Alignment = {0}".format(l_align))
    log(i_log, "ReorderingModeltype = {0}".format(l_model))
    log(i_log, "ReorderingOrientation = {0}".format(l_orient))
    log(i_log, "ReorderingDirectionality = {0}".format(l_direct))
    log(i_log, "ReorderingLanguage = {0}".format(l_lang))
    log(i_log, "MaxPhraseLength = {0}".format(l_len))
    log(i_log, "")
   
    #Create language model 
    lm_start = time.time()
    pcommand("mkdir {0}".format(lm_path), c_log)
    pcommand("{0}/bin/add-start-end.sh < {1}/{2}.true.{3} > {4}/{2}.sb.{3}".format(irstlm_path, corpora_path, train_name, foreign, lm_path), c_log)
    pcommand("{0}/bin/build-lm.sh -i {5}/{1}.sb.{4} -t {5}/tmp -p -n {2} -s {3} -o {5}/{1}.ilm.{4}.gz".format(irstlm_path,train_name, l_order, l_smoothing, foreign, lm_path), c_log)
    pcommand("{0}/bin/compile-lm --text  {3}/{1}.ilm.{2}.gz {3}/{1}.arpa.{2}".format(irstlm_path,train_name, foreign, lm_path), c_log)
    pcommand("{0}/bin/build_binary -i {3}/{1}.arpa.es {3}/{1}.blm.{2}".format(moses_path,train_name, foreign, lm_path), c_log)
    
    pcommand("{0}/bin/add-start-end.sh < {1}/{2}.pos.{3} > {4}/{2}.pos.sb.{3}".format(irstlm_path, corpora_path, train_name, foreign, lm_path), c_log)
    pcommand("{0}/bin/build-lm.sh -i {5}/{1}.pos.sb.{4} -t {5}/tmp -p -n {2} -s {3} -o {5}/{1}.pos.ilm.{4}.gz".format(irstlm_path,train_name, l_order, l_smoothing, foreign, lm_path), c_log)
    pcommand("{0}/bin/compile-lm --text  {3}/{1}.pos.ilm.{2}.gz {3}/{1}.pos.arpa.{2}".format(irstlm_path,train_name, foreign, lm_path), c_log)
    pcommand("{0}/bin/build_binary -i {3}/{1}.pos.arpa.es {3}/{1}.pos.blm.{2}".format(moses_path,train_name, foreign, lm_path), c_log)
    
    o=pcommand("echo 'Is this a Spanish sentance?' | {0}/bin/query {1}/{2}.blm.{3}".format(moses_path, lm_path, train_name, foreign), c_log)
    log(i_log,"")
    log(i_log, o.out)
    log(i_log, o.err)
    log(i_log, "LM_Time = {0}".format(str(time.time()-lm_start)))
    log(i_log, "LM_Time_HMS = {0}".format(str(datetime.timedelta(seconds=(time.time()-lm_start)))))

   
    #Train the model
    train_start=time.time()
    
    log(i_log, "Train_Start_Time = {0}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
    
    
    pcommand("mkdir {0}".format(working_path), c_log)
    pcommand("{0}/scripts/training/train-model.perl -root-dir {15}/train -corpus {1}/{2}.factored.clean -f en -e {3} --score-options \'{4}\' -alignment {5} -reordering {6}-{7}-{8}-{9} -lm 0:{10}:{11}/{12}.blm.{13}:1 -lm 1:{10}:{11}/{12}.pos.blm.{13}:1 --translation-factors 0-0,1 -mgiza -mgiza-cpus {14} -external-bin-dir {0}/tools -cores {14} --parallel --parts 3 2>&1 > {15}/training.out".format(moses_path, corpora_path, train_name, foreign, l_score, l_align, l_model, l_orient, l_direct, l_lang, l_order, lm_path, train_name, foreign, threads, working_path), c_log)
    log(i_log, "Train_Time = {0}".format(str(time.time()-lm_start)))
    log(i_log, "Train_Time_HMS = {0}".format(str(datetime.timedelta(seconds=(time.time()-lm_start)))))
    print("trained")
     
    #Tune the model
    tune_start=time.time()
    log(i_log, "Tune_Start_Time = {0}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
    pcommand("{0}/scripts/training/mert-moses.pl {1}/{2}.true.en {1}/{2}.true.{3} {0}/bin/moses  {4}/train/model/moses.ini --working-dir {4}/mert-work --mertdir {0}/bin/ 2>&1 > {4}/mert.out".format(moses_path, corpora_path, tune_name, foreign, working_path), c_log) 
    log(i_log, "Tune_Time = {0}".format(str(time.time()-tune_start)))
    log(i_log, "Tune_Time_HMS = {0}".format(str(datetime.timedelta(seconds=(time.time()-tune_start)))))
    print("tuned")
    
    #Test the model
    test_start=time.time()
    log(i_log, "Test_Start_Time = {0}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
    pcommand("{0}/scripts/training/filter-model-given-input.pl {3}/filtered-{1} {3}/mert-work/moses.ini {2}/{1}.true.en -Binarizer {0}/bin/processPhraseTable".format(moses_path, test_name, corpora_path, working_path), c_log)
    pcommand("{0}/bin/moses -f {1}/filtered-{3}/moses.ini  < {2}/{3}.true.en > {1}/{3}.translated.{4} 2> {1}/{3}.out".format(moses_path, working_path, corpora_path, test_name, foreign), c_log)
    c=pcommand("{0}/scripts/generic/multi-bleu.perl -lc {1}/{2}.true.{4} < {3}/{2}.translated.{4}".format(moses_path, corpora_path, test_name, working_path, foreign), c_log)
    log(i_log, c.out)
    print("tested")
    log(i_log, "Test_Time = {0}".format(str(time.time()-test_start)))
    log(i_log, "Test_Time_HMS = {0}".format(str(datetime.timedelta(seconds=(time.time()-test_start)))))
    log(i_log, "Run_Time_HMS = {0}".format(str(datetime.timedelta(seconds=(time.time()-run_start)))))
    log(i_log, "End_Time = {0}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
    log(i_log, "Done = {0}".format(i))
    i_log.close()
    c_log.close()
Ejemplo n.º 8
0
def pcommand(c, log):  
    log.write(c+"\n")
    o=command(c)
    log.write(o.out+"\n")
    log.write(o.err+"\n")
    return o
Ejemplo n.º 9
0
def pcommand(c, log):
    log.write(c + "\n")
    o = command(c)
    log.write(o.out + "\n")
    log.write(o.err + "\n")
    return o
Ejemplo n.º 10
0
def run_config(l_len, l_order, l_lang, l_direct, l_score, l_smoothing, l_align,
               l_orient, l_model, i):

    i = str(i)
    run_start = time.time()
    lm_path = "{0}/{1}/lm".format(archive_path, i)
    working_path = "{0}/{1}/working".format(archive_path, i)

    c = command("mkdir {0}/{1}".format(archive_path, i))
    print(c.out)
    print(c.err)

    i_log = open("{0}/{1}/{1}.ilog.txt".format(archive_path, i), "w", 1)
    c_log = open("{0}/{1}/{1}.clog.txt".format(archive_path, i), "w", 1)

    log(i_log, "i = {0}".format(i))
    log(
        i_log, "Start_Time = {0}".format(
            datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
    log(i_log, "Order = {0}".format(l_order))
    log(i_log, "Smoothing = {0}".format(l_smoothing))
    log(i_log, "ScoreOptions = {0}".format(l_score))
    log(i_log, "Alignment = {0}".format(l_align))
    log(i_log, "ReorderingModeltype = {0}".format(l_model))
    log(i_log, "ReorderingOrientation = {0}".format(l_orient))
    log(i_log, "ReorderingDirectionality = {0}".format(l_direct))
    log(i_log, "ReorderingLanguage = {0}".format(l_lang))
    log(i_log, "MaxPhraseLength = {0}".format(l_len))
    log(i_log, "")

    #Create language model
    lm_start = time.time()
    pcommand("mkdir {0}".format(lm_path), c_log)
    pcommand(
        "{0}/bin/add-start-end.sh < {1}/{2}.true.{3} > {4}/{2}.sb.{3}".format(
            irstlm_path, corpora_path, train_name, foreign, lm_path), c_log)
    pcommand(
        "{0}/bin/build-lm.sh -i {5}/{1}.sb.{4} -t {5}/tmp -p -n {2} -s {3} -o {5}/{1}.ilm.{4}.gz"
        .format(irstlm_path, train_name, l_order, l_smoothing, foreign,
                lm_path), c_log)
    pcommand(
        "{0}/bin/compile-lm --text  {3}/{1}.ilm.{2}.gz {3}/{1}.arpa.{2}".
        format(irstlm_path, train_name, foreign, lm_path), c_log)
    pcommand(
        "{0}/bin/build_binary -i {3}/{1}.arpa.es {3}/{1}.blm.{2}".format(
            moses_path, train_name, foreign, lm_path), c_log)

    pcommand(
        "{0}/bin/add-start-end.sh < {1}/{2}.pos.{3} > {4}/{2}.pos.sb.{3}".
        format(irstlm_path, corpora_path, train_name, foreign, lm_path), c_log)
    pcommand(
        "{0}/bin/build-lm.sh -i {5}/{1}.pos.sb.{4} -t {5}/tmp -p -n {2} -s {3} -o {5}/{1}.pos.ilm.{4}.gz"
        .format(irstlm_path, train_name, l_order, l_smoothing, foreign,
                lm_path), c_log)
    pcommand(
        "{0}/bin/compile-lm --text  {3}/{1}.pos.ilm.{2}.gz {3}/{1}.pos.arpa.{2}"
        .format(irstlm_path, train_name, foreign, lm_path), c_log)
    pcommand(
        "{0}/bin/build_binary -i {3}/{1}.pos.arpa.es {3}/{1}.pos.blm.{2}".
        format(moses_path, train_name, foreign, lm_path), c_log)

    o = pcommand(
        "echo 'Is this a Spanish sentance?' | {0}/bin/query {1}/{2}.blm.{3}".
        format(moses_path, lm_path, train_name, foreign), c_log)
    log(i_log, "")
    log(i_log, o.out)
    log(i_log, o.err)
    log(i_log, "LM_Time = {0}".format(str(time.time() - lm_start)))
    log(
        i_log, "LM_Time_HMS = {0}".format(
            str(datetime.timedelta(seconds=(time.time() - lm_start)))))

    #Train the model
    train_start = time.time()

    log(
        i_log, "Train_Start_Time = {0}".format(
            datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))

    pcommand("mkdir {0}".format(working_path), c_log)
    pcommand(
        "{0}/scripts/training/train-model.perl -root-dir {15}/train -corpus {1}/{2}.factored.clean -f en -e {3} --score-options \'{4}\' -alignment {5} -reordering {6}-{7}-{8}-{9} -lm 0:{10}:{11}/{12}.blm.{13}:1 -lm 1:{10}:{11}/{12}.pos.blm.{13}:1 --translation-factors 0-0,1 -mgiza -mgiza-cpus {14} -external-bin-dir {0}/tools -cores {14} --parallel --parts 3 2>&1 > {15}/training.out"
        .format(moses_path, corpora_path, train_name, foreign, l_score,
                l_align, l_model, l_orient, l_direct, l_lang, l_order, lm_path,
                train_name, foreign, threads, working_path), c_log)
    log(i_log, "Train_Time = {0}".format(str(time.time() - lm_start)))
    log(
        i_log, "Train_Time_HMS = {0}".format(
            str(datetime.timedelta(seconds=(time.time() - lm_start)))))
    print("trained")

    #Tune the model
    tune_start = time.time()
    log(
        i_log, "Tune_Start_Time = {0}".format(
            datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
    pcommand(
        "{0}/scripts/training/mert-moses.pl {1}/{2}.true.en {1}/{2}.true.{3} {0}/bin/moses  {4}/train/model/moses.ini --working-dir {4}/mert-work --mertdir {0}/bin/ 2>&1 > {4}/mert.out"
        .format(moses_path, corpora_path, tune_name, foreign,
                working_path), c_log)
    log(i_log, "Tune_Time = {0}".format(str(time.time() - tune_start)))
    log(
        i_log, "Tune_Time_HMS = {0}".format(
            str(datetime.timedelta(seconds=(time.time() - tune_start)))))
    print("tuned")

    #Test the model
    test_start = time.time()
    log(
        i_log, "Test_Start_Time = {0}".format(
            datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
    pcommand(
        "{0}/scripts/training/filter-model-given-input.pl {3}/filtered-{1} {3}/mert-work/moses.ini {2}/{1}.true.en -Binarizer {0}/bin/processPhraseTable"
        .format(moses_path, test_name, corpora_path, working_path), c_log)
    pcommand(
        "{0}/bin/moses -f {1}/filtered-{3}/moses.ini  < {2}/{3}.true.en > {1}/{3}.translated.{4} 2> {1}/{3}.out"
        .format(moses_path, working_path, corpora_path, test_name,
                foreign), c_log)
    c = pcommand(
        "{0}/scripts/generic/multi-bleu.perl -lc {1}/{2}.true.{4} < {3}/{2}.translated.{4}"
        .format(moses_path, corpora_path, test_name, working_path,
                foreign), c_log)
    log(i_log, c.out)
    print("tested")
    log(i_log, "Test_Time = {0}".format(str(time.time() - test_start)))
    log(
        i_log, "Test_Time_HMS = {0}".format(
            str(datetime.timedelta(seconds=(time.time() - test_start)))))
    log(
        i_log, "Run_Time_HMS = {0}".format(
            str(datetime.timedelta(seconds=(time.time() - run_start)))))
    log(
        i_log, "End_Time = {0}".format(
            datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
    log(i_log, "Done = {0}".format(i))
    i_log.close()
    c_log.close()