def makeSimFiles(language,
                 outpath=os.path.join(os.getcwd().split('code')[0], 'maxent2',
                                      'temp'),
                 testDataToUse=1 / 5,
                 predefault=False,
                 ag_disag=False):
    """
        makes corpus.txt, test.txt, learning.txt files compatible with the command line learner
        If not given a test data file, it will use 1/5th of the learning data file to make a random subset (or the amount you specify in the last arg)
        """
    inpath = os.path.join(os.getcwd().split('code')[0], 'data', language)
    fix_input_files.fixFeatureFile(inpath, outpath)
    #make a test file from 1/5th of the data, then make a learning file from what remains
    if "TestingData.txt" in os.listdir(inpath):
        fix_input_files.fixDataFile(inpath, typ='test')
        fix_input_files.fixDataFile(inpath, typ='learning')
    #otherwise we'll make one out of 20% of your learning data and withhold it:
    else:
        #converting LearningData.txt to 'corpus.txt':
        fix_input_files.fixDataFile(inpath, typ='learning')
        corpath = os.path.join(outpath, 'corpus.txt')  #'LearningData.txt')
        testdatapath = os.path.join(outpath, 'test.txt')  #'TestingData.txt')
        newlearndatapath = os.path.join(outpath, 'subcorpus.txt')
        #make a random sample file using specified amount of data
        datasampler.makeRandomTestFile(corpath, testdatapath, newlearndatapath,
                                       testDataToUse)
        #fix_input_files.fixDataFile(testdatapath, typ='test')
        os.remove(os.path.join(outpath, 'corpus.txt'))
        os.rename(os.path.join(outpath, 'subcorpus.txt'),
                  os.path.join(outpath, 'corpus.txt'))
    #if you want to turn on the preselection option
    if predefault:
        dflt_grammar.makeDefGramFile(inpath)
    if ag_disag:
        agree_disagree.make_gram_file()
def copyTestFiles(grammarpath, testfilepath):
    '''
    for testing an existing grammar.
    this copies a grammar.txt file and a projections.txt file to the maxent directory, and a test file and a features file to the maxent directory
    grammarpath leads to the locatioon of grammar.txt, and testfilepath to TestingData.txt.
    '''
    maxentpath = os.path.join(os.getcwd().split('code')[0], 'maxent2', 'temp')
    testfiledir = testfilepath.split('TestingData.txt')[0]
    fix_input_files.fixFeatureFile(testfiledir, maxentpath)
    fix_input_files.fixDataFile(testfiledir, typ='test')
    with open(os.path.join(maxentpath, 'params.txt'), 'w',
              encoding='utf-8') as f:
        f.write(
            '-test\ttest.txt\n-grammar\tgrammar.txt\n-projections\tprojections.txt\n-features\tfeatures.txt'
        )
    shutil.copy(grammarpath, os.path.join(maxentpath, 'grammar.txt'))
    projections = os.path.join(
        grammarpath.split('grammar.txt')[0], 'projections.txt')
    shutil.copy(projections, os.path.join(maxentpath, 'projections.txt'))