def makeArgParser(configfilename="config.json"): language, \ corpus, \ datafolder, \ configtext = load_config_for_command_line_help(configfilename) parser = argparse.ArgumentParser( description="Extracting phone/letter ngrams from a wordlist; also other" "phonology-related stuff\n\n{}" .format(configtext), formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--config", help="configuration filename", type=str, default=configfilename) parser.add_argument("--language", help="Language name", type=str, default=None) parser.add_argument("--corpus", help="Corpus file to use", type=str, default=None) parser.add_argument("--datafolder", help="path of the data folder", type=str, default=None) parser.add_argument("--maxwordtokens", help="maximum number of word tokens;" " if this is zero, then the program counts " "all word tokens in the corpus", type=int, default=0) return parser
def makeArgParser(configfilename="config.json"): language, \ corpus, \ datafolder, \ configtext = load_config_for_command_line_help(configfilename) parser = argparse.ArgumentParser( description="Word segmentation program.\n\n{}" .format(configtext), formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--config", help="configuration filename", type=str, default=configfilename) parser.add_argument("--language", help="Language name", type=str, default=None) parser.add_argument("--corpus", help="Corpus file to use", type=str, default=None) parser.add_argument("--datafolder", help="path of the data folder", type=str, default=None) parser.add_argument("--cycles", help="number of cycles", type=int, default=200) parser.add_argument("--candidates", help="number of candidates" "per iteration", type=int, default=25) parser.add_argument("--verbose", help="verbose output", type=bool, default=False) return parser
def makeArgParser(configfilename="config.json"): language, corpus, datafolder, configtext = load_config_for_command_line_help(configfilename) parser = argparse.ArgumentParser( description="This program computes word neighbors.\n\n{}".format(configtext), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("--config", help="configuration filename", type=str, default=configfilename) parser.add_argument("--language", help="Language name", type=str, default=None) parser.add_argument("--corpus", help="Corpus file to use", type=str, default=None) parser.add_argument("--datafolder", help="path of the data folder", type=str, default=None) parser.add_argument("--maxwordtypes", help="Number of word types to handle", type=int, default=1000) parser.add_argument("--nNeighbors", help="Number of neighbors", type=int, default=9) parser.add_argument("--nEigenvectors", help="Number of eigenvectors", type=int, default=11) parser.add_argument( "--mincontexts", help="Minimum number of times that " "a word occurs in a context; " "also minimum number of neighbors for a word that share " "a context (for WordToSharedContextsOfNeighbors)", type=int, default=3, ) parser.add_argument("--wordtocontexts", help="create the WordToContexts dict?", type=bool, default=False) parser.add_argument("--contexttowords", help="create the ContextToWords dict?", type=bool, default=False) parser.add_argument("--usesigtransforms", help="use signature transforms?", type=bool, default=True) return parser
def makeArgParser(configfilename="config.json"): language, \ corpus, \ datafolder, \ configtext = load_config_for_command_line_help(configfilename) parser = argparse.ArgumentParser( description="This program creates word neighbor graphs.\n\n{}".format( configtext), formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--config", help="configuration filename", type=str, default=configfilename) parser.add_argument("--language", help="Language name", type=str, default=None) parser.add_argument("--corpus", help="Corpus file to use", type=str, default=None) parser.add_argument("--datafolder", help="path of the data folder", type=str, default=None) return parser
def makeArgParser(configfilename="config.json"): language, \ corpus, \ datafolder, \ configtext = load_config_for_command_line_help(configfilename) parser = argparse.ArgumentParser( description="This program computes morphological signatures.\n\n{}". format(configtext), formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--config", help="configuration filename", type=str, default=configfilename) parser.add_argument("--language", help="Language name", type=str, default=None) parser.add_argument("--corpus", help="Corpus file to use", type=str, default=None) parser.add_argument("--datafolder", help="path of the data folder", type=str, default=None) parser.add_argument("--minstem", help="Minimum stem length; " "usually from 2 to 5, where a smaller number means " "you can find shorter stems although the program " "may run a lot slower", type=int, default=4) parser.add_argument("--maxaffix", help="Maximum affix length; " "usually from 1 to 5, where a larger number means " "you can find longer affixes", type=int, default=3) parser.add_argument("--minsig", help="Minimum number of signature use; " "a small number like 5 is pretty much the smallest " "to use in order to filter spurious signatures; may " "try larger numbers like 10 or 20 and so forth", type=int, default=5) parser.add_argument("--maxwordtokens", help="maximum number of word tokens;" " if this is zero, then the program counts " "all word tokens in the corpus", type=int, default=0) return parser
def makeArgParser(configfilename="config.json"): language, \ corpus, \ datafolder, \ configtext = load_config_for_command_line_help(configfilename) parser = argparse.ArgumentParser( description="The program computes tries given corpus data.\n\n{}". format(configtext), formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--config", help="configuration filename", type=str, default=configfilename) parser.add_argument("--language", help="Language name", type=str, default=None) parser.add_argument("--corpus", help="Corpus file to use", type=str, default=None) parser.add_argument("--datafolder", help="relative path of the data folder", type=str, default=None) parser.add_argument("--minstem", help="Minimum stem length; " "usually from 2 to 5, where a smaller number means " "you can find shorter stems although the program " "may run a lot slower", type=int, default=4) parser.add_argument("--minaffix", help="Minimum affix length", type=int, default=1) parser.add_argument("--minsize", help="Minimum size of " "successors/predecessors for output", type=int, default=3) parser.add_argument("--maxwordtokens", help="maximum number of word tokens;" " if this is zero, then the program counts " "all word tokens in the corpus", type=int, default=0) return parser
def makeArgParser(configfilename="config.json"): language, \ corpus, \ datafolder, \ configtext = load_config_for_command_line_help(configfilename) parser = argparse.ArgumentParser( description="This program creates word neighbor graphs.\n\n{}" .format(configtext), formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--config", help="configuration filename", type=str, default=configfilename) parser.add_argument("--language", help="Language name", type=str, default=None) parser.add_argument("--corpus", help="Corpus file to use", type=str, default=None) parser.add_argument("--datafolder", help="path of the data folder", type=str, default=None) return parser
def makeArgParser(configfilename="config.json"): language, \ corpus, \ datafolder, \ configtext = load_config_for_command_line_help(configfilename) parser = argparse.ArgumentParser( description="Extracting phone/letter ngrams from a wordlist; also other" "phonology-related stuff\n\n{}".format(configtext), formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--config", help="configuration filename", type=str, default=configfilename) parser.add_argument("--language", help="Language name", type=str, default=None) parser.add_argument("--corpus", help="Corpus file to use", type=str, default=None) parser.add_argument("--datafolder", help="path of the data folder", type=str, default=None) parser.add_argument("--maxwordtokens", help="maximum number of word tokens;" " if this is zero, then the program counts " "all word tokens in the corpus", type=int, default=0) return parser
def makeArgParser(configfilename="config.json"): language, \ corpus, \ datafolder, \ configtext = load_config_for_command_line_help(configfilename) parser = argparse.ArgumentParser( description="The program computes tries given corpus data.\n\n{}" .format(configtext), formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--config", help="configuration filename", type=str, default=configfilename) parser.add_argument("--language", help="Language name", type=str, default=None) parser.add_argument("--corpus", help="Corpus file to use", type=str, default=None) parser.add_argument("--datafolder", help="relative path of the data folder", type=str, default=None) parser.add_argument("--minstem", help="Minimum stem length; " "usually from 2 to 5, where a smaller number means " "you can find shorter stems although the program " "may run a lot slower", type=int, default=4) parser.add_argument("--minaffix", help="Minimum affix length", type=int, default=1) parser.add_argument("--minsize", help="Minimum size of " "successors/predecessors for output", type=int, default=3) parser.add_argument("--maxwordtokens", help="maximum number of word tokens;" " if this is zero, then the program counts " "all word tokens in the corpus", type=int, default=0) return parser
def makeArgParser(configfilename="config.json"): language, \ corpus, \ datafolder, \ configtext = load_config_for_command_line_help(configfilename) parser = argparse.ArgumentParser( description="This program computes word neighbors.\n\n{}".format( configtext), formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--config", help="configuration filename", type=str, default=configfilename) parser.add_argument("--language", help="Language name", type=str, default=None) parser.add_argument("--corpus", help="Corpus file to use", type=str, default=None) parser.add_argument("--datafolder", help="path of the data folder", type=str, default=None) parser.add_argument("--maxwordtypes", help="Number of word types to handle", type=int, default=1000) parser.add_argument("--nNeighbors", help="Number of neighbors", type=int, default=9) parser.add_argument("--nEigenvectors", help="Number of eigenvectors", type=int, default=11) parser.add_argument( "--mincontexts", help="Minimum number of times that " "a word occurs in a context; " "also minimum number of neighbors for a word that share " "a context (for WordToSharedContextsOfNeighbors)", type=int, default=3) parser.add_argument("--wordtocontexts", help="create the WordToContexts dict?", type=bool, default=False) parser.add_argument("--contexttowords", help="create the ContextToWords dict?", type=bool, default=False) parser.add_argument("--usesigtransforms", help="use signature transforms?", type=bool, default=True) return parser