def run_agree_disagree_sim(language, viol, mgain, nconstraints, mb, gam, parameters, reducemem): ''' this is not described anywhere, is work in progress makes a bunch of constraints on the basis of the natural classes structure of the language they have the form +f +f, +f [+wb] +f, +f [-wb] +f, and ditto for every combination of + and - for f (in other words, it makes agree constraints and disagree constraints for every feature and nat class in the language) it then runs a simulation with this premade constraint set. the early results in testing this have not been encouraging ''' basepath = os.getcwd().split('code')[0] lgfullpath = os.path.join(basepath, 'data', language) simfunc.cleanUpWorkdir(basepath) simfunc.makeSimFiles(lgfullpath, ag_disag=True) if parameters: params.move_params(lgfullpath, 'params.txt') viol, mgain, nconstraints, gam = params.read_params() else: params.makeParams(consize=nconstraints, violable=viol, mingain=mgain, gamma=gam, ag_disag=True) simfunc.runBaselineSim(basepath, reducemem=reducemem) wrapstring = os.path.join( 'sims', '_'.join([ language.replace(os.sep, "_"), 'baseline', 'AG', viol[:2], 'gain', mingain, 'ncons', nconstraints ])) return simfunc.wrapSims(wrapstring, ret=True)
def run_baseline_sim(language, viol, mgain, nconstraints, mb, gam, parameters, reducemem): ''' this function runs the baseline simulation with a default (segmental) projection if it does not succeed, it does not fail gracefully, so be forwarned ''' basepath = os.getcwd().split('code')[0] lgfullpath = os.path.join(basepath, 'data', language) simfunc.cleanUpWorkdir(basepath) if parameters: params.move_params(os.path.join(lgfullpath, 'params.txt')) viol, mgain, nconstraints, gam = params.read_params() else: params.makeParams(consize=nconstraints, violable=viol, mingain=mgain, gamma=gam, predefault=False) simfunc.makeSimFiles(lgfullpath) try: simfunc.runBaselineSim(basepath, reducemem=reducemem) #language=language.split('../data/')[1].replace('/','_') wrapstring = os.path.join( 'sims', language.replace(os.sep, "_") + '_baseline' + '_gain' + mgain + '_con' + nconstraints) simfunc.wrapSims(wrapstring) except CalledProcessError: print("Done")
def run_wb_sim(language, viol, mgain, nconstraints, mb, gam, parameters, reducemem): ''' this learning simulation is described in Gouskova and Gallagher (NLLT). The learner starts with a baseline grammar; if this grammar contains placeholder trigrams, it creates projections for each distinct trigram and runs a final simulation with those projections available. ''' basepath = os.getcwd().split('code')[0] simfunc.cleanUpWorkdir(basepath) if parameters: params.move_params( os.path.join(basepath, 'data', language, 'params.txt')) viol, mgain, nconstraints, gamma = params.read_params() else: params.makeParams(consize=nconstraints, violable=viol, mingain=mgain, gamma=gam, predefault=False) simfunc.makeSimFiles(language) #baseline simulation simfunc.runBaselineSim(basepath, reducemem=reducemem) #analyze resulting grammar.txt file, make projections for each wb-mentioning constraint simfunc.makeProjection(basepath, 'wb', mb) if len(os.listdir('projections')) == 0: print( '\nNo projections were found because there were no placeholder constraints in the baseline grammar.' ) else: simfunc.runCustomSim(reducemem=reducemem, simtype='wb') vio = viol[0:2] wrapstring = os.path.join('sims', language.replace( os.sep, "_")) + "_" + '_'.join( ['wb', vio, 'gain' + str(mgain), 'con' + str(nconstraints)]) return simfunc.wrapSims(wrapstring, ret=True)
def run_mbsublex_sim(language, viol, mgain, nconstraints, mb, gam, parameters, reducemem): ''' this simulation needs a corpus with morpheme boundaries. it starts by running a baseline simulation on the corpus. then, if it finds constraints in the resulting grammar that mention [-mb] (see mbsublex module), it splits the learning data into individual morphemes, one morph per line it then uses that as a new baseline data set. if it locates any placeholder trigrams in that subset, it makes a projection from them and then it runs a projection simulation on the morpheme sublexicon, and on the whole corpus. ''' basepath = os.getcwd().split('code')[0] maxentdir = os.path.join(basepath, 'maxent2', 'temp') dircontent = os.listdir(maxentdir) vio = viol[0:2] wrapstring = os.path.join('sims', language.replace( os.sep, "_")) + "_" + '_'.join([ 'wb', 'mbsublex', vio, 'gain' + str(mgain), 'con' + str(nconstraints) ]) if not 'output_baseline' in dircontent: simfunc.cleanUpWorkdir(basepath) if parameters: params.move_params( os.path.join(basepath, 'data', language, 'params.txt')) viol, mgain, nconstraints, gamma = params.read_params() else: params.makeParams(consize=nconstraints, violable=viol, mingain=mgain, gamma=gam, predefault=False) simfunc.makeSimFiles(language) print('running the baseline simulation using original training corpus') simfunc.runBaselineSim( basepath, reducemem=reducemem, rt_output_baseline=False ) #copies grammar, proj, tableau, maxentouptut in maxent2/temp/output_baseline mbsublex.move_sublex_files(kind='output_baseline') if not 'output_mbsublex_baseline' in dircontent: print("Baseline simulation found at " + os.path.join(maxentdir, 'output_baseline')) #analyze resulting grammar.txt file for [-mb] constraints, and make projections found_mb = mbsublex.search_grammar_for_mb() if found_mb: print('Making a sublexicon with one morph per line') mbsublex.make_freewd_sublexicon( ) #renames curr corpus 'orig_corpus.txt', creates new corpus that consists of just morphologically simple words and is called 'corpus.txt' print( 'Running a new baseline simulation using a sublexicon as training data' ) if parameters: params.scale_params(inpath=os.path.join( basepath, 'data', language, 'params.txt'), multiply_by=0.01, keepconsize=True) else: params.scale_params(viol, gain, consize, gamma, 0.01, True) #last one is keepconsize simfunc.runBaselineSim(basepath, reducemem=reducemem, rt_output_baseline=False) mbsublex.move_sublex_files(kind="output_mbsublex_baseline") else: print('Did not find any *X-mb-Y trigrams. Quitting now.') return mbsublex.wrapSims(wrapstring, basepath, ret=True) if not 'output_mbsublex' in dircontent: print("Sublexicon baseline simulation found at " + os.path.join(maxentdir, 'output_mbsublex')) mbsublex.makeProjection(basepath, 'wb', mb=True) print( 'projections found--running a projection simulation on morph sublexicon' ) simfunc.runCustomSim(reducemem=reducemem, simtype='wb') mbsublex.move_sublex_files(kind='output_mbsublex') if not 'output_final' in dircontent: mbsublex.rename_corpus_back() print( 'now running a projection simulation on the original training corpus' ) if parameters: params.scale_params(inpath=os.path.join(basepath, 'data', language, 'params.txt'), multiply_by=1, keepconsize=True) else: params.scale_params(viol, gain, consize, gamma, 10, True) simfunc.runCustomSim(reducemem=reducemem, simtype='wb') mbsublex.move_sublex_files(kind='output_final') print('done!') try: return mbsublex.wrapSims(wrapstring, basepath=maxentdir, ret=True) except: print("The simulation failed for some reason. Check the contents of " + maxentdir + " to help with debugging.")