Example #1
0
def makeArgParser(configfilename="config.json"):

    language, \
    corpus, \
    datafolder, \
    configtext = load_config_for_command_line_help(configfilename)

    parser = argparse.ArgumentParser(
        description="Extracting phone/letter ngrams from a wordlist; also other"
                    "phonology-related stuff\n\n{}"
                    .format(configtext),
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("--config", help="configuration filename",
                        type=str, default=configfilename)

    parser.add_argument("--language", help="Language name",
                        type=str, default=None)
    parser.add_argument("--corpus", help="Corpus file to use",
                        type=str, default=None)
    parser.add_argument("--datafolder", help="path of the data folder",
                        type=str, default=None)

    parser.add_argument("--maxwordtokens", help="maximum number of word tokens;"
                        " if this is zero, then the program counts "
                        "all word tokens in the corpus",
                        type=int, default=0)
    return parser
Example #2
0
def makeArgParser(configfilename="config.json"):

    language, \
    corpus, \
    datafolder, \
    configtext = load_config_for_command_line_help(configfilename)

    parser = argparse.ArgumentParser(
        description="Word segmentation program.\n\n{}"
                    .format(configtext),
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("--config", help="configuration filename",
                        type=str, default=configfilename)

    parser.add_argument("--language", help="Language name",
                        type=str, default=None)
    parser.add_argument("--corpus", help="Corpus file to use",
                        type=str, default=None)
    parser.add_argument("--datafolder", help="path of the data folder",
                        type=str, default=None)

    parser.add_argument("--cycles", help="number of cycles",
                        type=int, default=200)
    parser.add_argument("--candidates", help="number of candidates"
                        "per iteration",
                        type=int, default=25)
    parser.add_argument("--verbose", help="verbose output",
                        type=bool, default=False)

    return parser
Example #3
0
def makeArgParser(configfilename="config.json"):

    language, corpus, datafolder, configtext = load_config_for_command_line_help(configfilename)

    parser = argparse.ArgumentParser(
        description="This program computes word neighbors.\n\n{}".format(configtext),
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    parser.add_argument("--config", help="configuration filename", type=str, default=configfilename)

    parser.add_argument("--language", help="Language name", type=str, default=None)
    parser.add_argument("--corpus", help="Corpus file to use", type=str, default=None)
    parser.add_argument("--datafolder", help="path of the data folder", type=str, default=None)

    parser.add_argument("--maxwordtypes", help="Number of word types to handle", type=int, default=1000)
    parser.add_argument("--nNeighbors", help="Number of neighbors", type=int, default=9)
    parser.add_argument("--nEigenvectors", help="Number of eigenvectors", type=int, default=11)

    parser.add_argument(
        "--mincontexts",
        help="Minimum number of times that "
        "a word occurs in a context; "
        "also minimum number of neighbors for a word that share "
        "a context (for WordToSharedContextsOfNeighbors)",
        type=int,
        default=3,
    )
    parser.add_argument("--wordtocontexts", help="create the WordToContexts dict?", type=bool, default=False)
    parser.add_argument("--contexttowords", help="create the ContextToWords dict?", type=bool, default=False)
    parser.add_argument("--usesigtransforms", help="use signature transforms?", type=bool, default=True)

    return parser
Example #4
0
def makeArgParser(configfilename="config.json"):

    language, \
    corpus, \
    datafolder, \
    configtext = load_config_for_command_line_help(configfilename)

    parser = argparse.ArgumentParser(
        description="This program creates word neighbor graphs.\n\n{}".format(
            configtext),
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("--config",
                        help="configuration filename",
                        type=str,
                        default=configfilename)

    parser.add_argument("--language",
                        help="Language name",
                        type=str,
                        default=None)
    parser.add_argument("--corpus",
                        help="Corpus file to use",
                        type=str,
                        default=None)
    parser.add_argument("--datafolder",
                        help="path of the data folder",
                        type=str,
                        default=None)

    return parser
Example #5
0
def makeArgParser(configfilename="config.json"):

    language, \
    corpus, \
    datafolder, \
    configtext = load_config_for_command_line_help(configfilename)

    parser = argparse.ArgumentParser(
        description="This program computes morphological signatures.\n\n{}".
        format(configtext),
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("--config",
                        help="configuration filename",
                        type=str,
                        default=configfilename)

    parser.add_argument("--language",
                        help="Language name",
                        type=str,
                        default=None)
    parser.add_argument("--corpus",
                        help="Corpus file to use",
                        type=str,
                        default=None)
    parser.add_argument("--datafolder",
                        help="path of the data folder",
                        type=str,
                        default=None)

    parser.add_argument("--minstem",
                        help="Minimum stem length; "
                        "usually from 2 to 5, where a smaller number means "
                        "you can find shorter stems although the program "
                        "may run a lot slower",
                        type=int,
                        default=4)
    parser.add_argument("--maxaffix",
                        help="Maximum affix length; "
                        "usually from 1 to 5, where a larger number means "
                        "you can find longer affixes",
                        type=int,
                        default=3)
    parser.add_argument("--minsig",
                        help="Minimum number of signature use; "
                        "a small number like 5 is pretty much the smallest "
                        "to use in order to filter spurious signatures; may "
                        "try larger numbers like 10 or 20 and so forth",
                        type=int,
                        default=5)
    parser.add_argument("--maxwordtokens",
                        help="maximum number of word tokens;"
                        " if this is zero, then the program counts "
                        "all word tokens in the corpus",
                        type=int,
                        default=0)
    return parser
Example #6
0
def makeArgParser(configfilename="config.json"):

    language, \
    corpus, \
    datafolder, \
    configtext = load_config_for_command_line_help(configfilename)

    parser = argparse.ArgumentParser(
        description="The program computes tries given corpus data.\n\n{}".
        format(configtext),
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("--config",
                        help="configuration filename",
                        type=str,
                        default=configfilename)

    parser.add_argument("--language",
                        help="Language name",
                        type=str,
                        default=None)
    parser.add_argument("--corpus",
                        help="Corpus file to use",
                        type=str,
                        default=None)
    parser.add_argument("--datafolder",
                        help="relative path of the data folder",
                        type=str,
                        default=None)

    parser.add_argument("--minstem",
                        help="Minimum stem length; "
                        "usually from 2 to 5, where a smaller number means "
                        "you can find shorter stems although the program "
                        "may run a lot slower",
                        type=int,
                        default=4)
    parser.add_argument("--minaffix",
                        help="Minimum affix length",
                        type=int,
                        default=1)
    parser.add_argument("--minsize",
                        help="Minimum size of "
                        "successors/predecessors for output",
                        type=int,
                        default=3)
    parser.add_argument("--maxwordtokens",
                        help="maximum number of word tokens;"
                        " if this is zero, then the program counts "
                        "all word tokens in the corpus",
                        type=int,
                        default=0)
    return parser
Example #7
0
def makeArgParser(configfilename="config.json"):

    language, \
    corpus, \
    datafolder, \
    configtext = load_config_for_command_line_help(configfilename)

    parser = argparse.ArgumentParser(
        description="This program creates word neighbor graphs.\n\n{}"
                    .format(configtext),
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("--config", help="configuration filename",
                        type=str, default=configfilename)

    parser.add_argument("--language", help="Language name",
                        type=str, default=None)
    parser.add_argument("--corpus", help="Corpus file to use",
                        type=str, default=None)
    parser.add_argument("--datafolder", help="path of the data folder",
                        type=str, default=None)

    return parser
Example #8
0
def makeArgParser(configfilename="config.json"):

    language, \
    corpus, \
    datafolder, \
    configtext = load_config_for_command_line_help(configfilename)

    parser = argparse.ArgumentParser(
        description="Extracting phone/letter ngrams from a wordlist; also other"
        "phonology-related stuff\n\n{}".format(configtext),
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("--config",
                        help="configuration filename",
                        type=str,
                        default=configfilename)

    parser.add_argument("--language",
                        help="Language name",
                        type=str,
                        default=None)
    parser.add_argument("--corpus",
                        help="Corpus file to use",
                        type=str,
                        default=None)
    parser.add_argument("--datafolder",
                        help="path of the data folder",
                        type=str,
                        default=None)

    parser.add_argument("--maxwordtokens",
                        help="maximum number of word tokens;"
                        " if this is zero, then the program counts "
                        "all word tokens in the corpus",
                        type=int,
                        default=0)
    return parser
Example #9
0
def makeArgParser(configfilename="config.json"):

    language, \
    corpus, \
    datafolder, \
    configtext = load_config_for_command_line_help(configfilename)

    parser = argparse.ArgumentParser(
        description="The program computes tries given corpus data.\n\n{}"
                    .format(configtext),
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("--config", help="configuration filename",
                        type=str, default=configfilename)

    parser.add_argument("--language", help="Language name",
                        type=str, default=None)
    parser.add_argument("--corpus", help="Corpus file to use",
                        type=str, default=None)
    parser.add_argument("--datafolder", help="relative path of the data folder",
                        type=str, default=None)

    parser.add_argument("--minstem", help="Minimum stem length; "
                        "usually from 2 to 5, where a smaller number means "
                        "you can find shorter stems although the program "
                        "may run a lot slower",
                        type=int, default=4)
    parser.add_argument("--minaffix", help="Minimum affix length",
                        type=int, default=1)
    parser.add_argument("--minsize",  help="Minimum size of "
                                           "successors/predecessors for output",
                        type=int, default=3)
    parser.add_argument("--maxwordtokens", help="maximum number of word tokens;"
                        " if this is zero, then the program counts "
                        "all word tokens in the corpus",
                        type=int, default=0)
    return parser
Example #10
0
def makeArgParser(configfilename="config.json"):

    language, \
    corpus, \
    datafolder, \
    configtext = load_config_for_command_line_help(configfilename)

    parser = argparse.ArgumentParser(
        description="This program computes word neighbors.\n\n{}".format(
            configtext),
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("--config",
                        help="configuration filename",
                        type=str,
                        default=configfilename)

    parser.add_argument("--language",
                        help="Language name",
                        type=str,
                        default=None)
    parser.add_argument("--corpus",
                        help="Corpus file to use",
                        type=str,
                        default=None)
    parser.add_argument("--datafolder",
                        help="path of the data folder",
                        type=str,
                        default=None)

    parser.add_argument("--maxwordtypes",
                        help="Number of word types to handle",
                        type=int,
                        default=1000)
    parser.add_argument("--nNeighbors",
                        help="Number of neighbors",
                        type=int,
                        default=9)
    parser.add_argument("--nEigenvectors",
                        help="Number of eigenvectors",
                        type=int,
                        default=11)

    parser.add_argument(
        "--mincontexts",
        help="Minimum number of times that "
        "a word occurs in a context; "
        "also minimum number of neighbors for a word that share "
        "a context (for WordToSharedContextsOfNeighbors)",
        type=int,
        default=3)
    parser.add_argument("--wordtocontexts",
                        help="create the WordToContexts dict?",
                        type=bool,
                        default=False)
    parser.add_argument("--contexttowords",
                        help="create the ContextToWords dict?",
                        type=bool,
                        default=False)
    parser.add_argument("--usesigtransforms",
                        help="use signature transforms?",
                        type=bool,
                        default=True)

    return parser