Esempio n. 1
0
        'Print deprels. The option can be "UD", "langspec", or "UD+langspec".')
    opt_parser.add_argument(
        '--catvals',
        default=None,
        help=
        'Print category=value pairs. The option can be "UD", "langspec", or "UD+langspec". This distinction is based on the feature, not the value.'
    )
    opt_parser.add_argument(
        '--sort',
        default='freq',
        help=
        'Sort the values by their frequency (freq) or alphabetically (alph). Default: %(default)s.'
    )
    args = opt_parser.parse_args()  #Parsed command-line arguments
    args.output = "-"
    inp, out = file_util.in_out(args, multiple_files=True)
    trees = file_util.trees(inp)

    stats = Stats()
    try:
        for comments, tree in trees:
            stats.tree_count += 1
            for cols in tree:
                stats.count_cols(cols)
    except:
        traceback.print_exc()
        print >> sys.stderr, "\n\n ------- STATS MAY BE EMPTY OR INCOMPLETE ----------"
        pass
    if args.stats:
        stats.print_basic_stats(out)
    if args.jsonstats:
Esempio n. 2
0
    if args.quiet:
        args.echo_input=False

    tagsets={POSTAG:None,CPOSTAG:None,FEATS:None,DEPREL:None,DEPS:None} #sets of tags for every column that needs to be checked

    if args.lang:
        tagsets[DEPREL]=load_set("deprel.ud","deprel."+args.lang,validate_langspec=True)
        if tagsets[DEPREL] is None:
            warn(u"The language-specific file data/deprel.%s could not be found. Dependency relations will not be checked.\nPlease add the language-specific dependency relations using python conllu-stats.py --deprels=langspec yourdata/*.conllu > data/deprel.%s\n Also please check that file for errorneous relations. It's okay if the file is empty, but it must exist.\n\n"%(args.lang,args.lang),"Language specific data missing",lineno=False)
        tagsets[DEPS]=tagsets[DEPREL]
        tagsets[FEATS]=load_set("feat_val.ud","feat_val."+args.lang)
        if tagsets[FEATS] is None:
            warn(u"The language-specific file data/feat_val.%s could not be found. Feature=value pairs will not be checked.\nPlease add the language-specific pairs using python conllu-stats.py --catvals=langspec yourdata/*.conllu > data/feat_val.%s It's okay if the file is empty, but it must exist.\n \n\n"%(args.lang,args.lang),"Language specific data missing",lineno=False)
        tagsets[CPOSTAG]=load_set("cpos.ud",None)

    inp,out=file_util.in_out(args)



    validate(inp,out,args,tagsets)
    if not error_counter:
        if not args.quiet:
            print >> sys.stderr, "*** PASSED ***"
        sys.exit(0)
    else:
        if not args.quiet:
            print >> sys.stderr, "*** FAILED *** with %d errors"%sum(v for k,v in error_counter.iteritems())
            for k,v in sorted(error_counter.items()):
                print >> sys.stderr, k, "errors:", v
        sys.exit(1)
    
Esempio n. 3
0
    print("Accuracy: ", numTruetag / numWords)


opt_parser = argparse.ArgumentParser(description="CoNLL-U validation script")

io_group = opt_parser.add_argument_group("Input / output options")
opt_parser.add_argument(
    'input',
    nargs='?',
    help='Input file name, or "-" or nothing for standard input.')
opt_parser.add_argument(
    'output',
    nargs='?',
    help='Output file name, or "-" or nothing for standard output.')
args = opt_parser.parse_args()  #Parsed command-line arguments
inp, out = file_util.in_out(args)
trees = file_util.trees(inp)
trees = list(trees)

makeCorpus(trees)
makeModel(100)
# getAccuracyBestTag(25)
# getAccuracyBestSeqTag(25)
print(
    "Tag kalimat: Ahli rekayasa optik mendesain komponen dari instrumen optik seperti lensa."
)
print(
    "Tag menggunakan best tag: ",
    tagSentence((
        " Ahli rekayasa optik mendesain komponen dari instrumen optik seperti lensa"
    ).split()))
Esempio n. 4
0
            if not cat==u"CPOSTAG" and ((u"UD" in which and cat in ud_cats) or (u"langspec" in which and cat not in ud_cats)):
                print >> out, cat_is_val
        
        

if __name__=="__main__":
    opt_parser = argparse.ArgumentParser(description='Script for basic stats generation. Assumes a validated input.')
    opt_parser.add_argument('input', nargs='+', help='Input file name (can be several files), or "-" or nothing for standard input.')
    opt_parser.add_argument('--stats',action='store_true',default=False, help='Print basic stats')
    opt_parser.add_argument('--jsonstats',action='store_true',default=False, help='Print basic stats as json dictionary')
    opt_parser.add_argument('--deprels',default=None,help='Print deprels. The option can be "UD", "langspec", or "UD+langspec".')
    opt_parser.add_argument('--catvals',default=None,help='Print category=value pairs. The option can be "UD", "langspec", or "UD+langspec". This distinction is based on the feature, not the value.')
    opt_parser.add_argument('--sort',default='freq',help='Sort the values by their frequency (freq) or alphabetically (alph). Default: %(default)s.')
    args = opt_parser.parse_args() #Parsed command-line arguments
    args.output="-"
    inp,out=file_util.in_out(args,multiple_files=True)
    trees=file_util.trees(inp)

    stats=Stats()
    try:
        for comments,tree in trees:
            stats.tree_count+=1
            for cols in tree:
                stats.count_cols(cols)
    except:
        traceback.print_exc()
        print >> sys.stderr, "\n\n ------- STATS MAY BE EMPTY OR INCOMPLETE ----------"
        pass
    if args.stats:
        stats.print_basic_stats(out)
    if args.jsonstats: