def run_wb_sim(language, viol, mgain, nconstraints, mb, gam, parameters, reducemem): ''' this learning simulation is described in Gouskova and Gallagher (NLLT). The learner starts with a baseline grammar; if this grammar contains placeholder trigrams, it creates projections for each distinct trigram and runs a final simulation with those projections available. ''' basepath = os.getcwd().split('code')[0] simfunc.cleanUpWorkdir(basepath) if parameters: params.move_params( os.path.join(basepath, 'data', language, 'params.txt')) viol, mgain, nconstraints, gamma = params.read_params() else: params.makeParams(consize=nconstraints, violable=viol, mingain=mgain, gamma=gam, predefault=False) simfunc.makeSimFiles(language) #baseline simulation simfunc.runBaselineSim(basepath, reducemem=reducemem) #analyze resulting grammar.txt file, make projections for each wb-mentioning constraint simfunc.makeProjection(basepath, 'wb', mb) if len(os.listdir('projections')) == 0: print( '\nNo projections were found because there were no placeholder constraints in the baseline grammar.' ) else: simfunc.runCustomSim(reducemem=reducemem, simtype='wb') vio = viol[0:2] wrapstring = os.path.join('sims', language.replace( os.sep, "_")) + "_" + '_'.join( ['wb', vio, 'gain' + str(mgain), 'con' + str(nconstraints)]) return simfunc.wrapSims(wrapstring, ret=True)
def run_handmade_projection_sim(language, viol, mgain, nconstraints, gam, parameters, feature, reducemem): ''' this either creates a projection file based on the value of "feature" (e.g., "-son") or runs a simulation with a custom projection file. To do the latter, supply a full path to the projection file as the last argument. ''' basepath = os.getcwd().split('code')[0] lgfullpath = os.path.join(basepath, 'data', language) simfunc.cleanUpWorkdir(basepath) simfunc.makeSimFiles(language) if parameters: params.move_params(lgfullpath, 'params.txt') viol, mgain, nconstraints, gam = params.read_params() else: params.makeParams(consize=nconstraints, violable=viol, mingain=mgain, gamma=gam) simfunc.handmakeProjection(basepath, feature) simfunc.runCustomSim(feature, reducemem=reducemem) if 'output_baseline' in os.listdir(basepath): shutil.rmtree(basepath + 'output_baseline') simfunc.wrapSims(os.path.join( 'sims', '_'.join(language.replace(os.sep, "_"), 'custom')), cust=True)
def run_mbsublex_sim(language, viol, mgain, nconstraints, mb, gam, parameters, reducemem): ''' this simulation needs a corpus with morpheme boundaries. it starts by running a baseline simulation on the corpus. then, if it finds constraints in the resulting grammar that mention [-mb] (see mbsublex module), it splits the learning data into individual morphemes, one morph per line it then uses that as a new baseline data set. if it locates any placeholder trigrams in that subset, it makes a projection from them and then it runs a projection simulation on the morpheme sublexicon, and on the whole corpus. ''' basepath = os.getcwd().split('code')[0] maxentdir = os.path.join(basepath, 'maxent2', 'temp') dircontent = os.listdir(maxentdir) vio = viol[0:2] wrapstring = os.path.join('sims', language.replace( os.sep, "_")) + "_" + '_'.join([ 'wb', 'mbsublex', vio, 'gain' + str(mgain), 'con' + str(nconstraints) ]) if not 'output_baseline' in dircontent: simfunc.cleanUpWorkdir(basepath) if parameters: params.move_params( os.path.join(basepath, 'data', language, 'params.txt')) viol, mgain, nconstraints, gamma = params.read_params() else: params.makeParams(consize=nconstraints, violable=viol, mingain=mgain, gamma=gam, predefault=False) simfunc.makeSimFiles(language) print('running the baseline simulation using original training corpus') simfunc.runBaselineSim( basepath, reducemem=reducemem, rt_output_baseline=False ) #copies grammar, proj, tableau, maxentouptut in maxent2/temp/output_baseline mbsublex.move_sublex_files(kind='output_baseline') if not 'output_mbsublex_baseline' in dircontent: print("Baseline simulation found at " + os.path.join(maxentdir, 'output_baseline')) #analyze resulting grammar.txt file for [-mb] constraints, and make projections found_mb = mbsublex.search_grammar_for_mb() if found_mb: print('Making a sublexicon with one morph per line') mbsublex.make_freewd_sublexicon( ) #renames curr corpus 'orig_corpus.txt', creates new corpus that consists of just morphologically simple words and is called 'corpus.txt' print( 'Running a new baseline simulation using a sublexicon as training data' ) if parameters: params.scale_params(inpath=os.path.join( basepath, 'data', language, 'params.txt'), multiply_by=0.01, keepconsize=True) else: params.scale_params(viol, gain, consize, gamma, 0.01, True) #last one is keepconsize simfunc.runBaselineSim(basepath, reducemem=reducemem, rt_output_baseline=False) mbsublex.move_sublex_files(kind="output_mbsublex_baseline") else: print('Did not find any *X-mb-Y trigrams. Quitting now.') return mbsublex.wrapSims(wrapstring, basepath, ret=True) if not 'output_mbsublex' in dircontent: print("Sublexicon baseline simulation found at " + os.path.join(maxentdir, 'output_mbsublex')) mbsublex.makeProjection(basepath, 'wb', mb=True) print( 'projections found--running a projection simulation on morph sublexicon' ) simfunc.runCustomSim(reducemem=reducemem, simtype='wb') mbsublex.move_sublex_files(kind='output_mbsublex') if not 'output_final' in dircontent: mbsublex.rename_corpus_back() print( 'now running a projection simulation on the original training corpus' ) if parameters: params.scale_params(inpath=os.path.join(basepath, 'data', language, 'params.txt'), multiply_by=1, keepconsize=True) else: params.scale_params(viol, gain, consize, gamma, 10, True) simfunc.runCustomSim(reducemem=reducemem, simtype='wb') mbsublex.move_sublex_files(kind='output_final') print('done!') try: return mbsublex.wrapSims(wrapstring, basepath=maxentdir, ret=True) except: print("The simulation failed for some reason. Check the contents of " + maxentdir + " to help with debugging.")