コード例 #1
0
            if p == -1:
                # Unweighted (weight=1) attribute.
                item.append(crfsuite.Attribute(field))
            else:
                # Weighted attribute
                item.append(crfsuite.Attribute(field[:p],
                                               float(field[p + 1:])))

        # Append the item to the item sequence.
        xseq.append(item)
        # Append the label to the label sequence.
        yseq.append(fields[0])

if __name__ == '__main__':
    # This demonstrates how to obtain the version string of CRFsuite.
    print crfsuite.version()

    # Create a Trainer object.
    trainer = Trainer()

    # Read training instances from STDIN, and set them to trainer.
    for xseq, yseq in instances(sys.stdin):
        trainer.append(xseq, yseq, 0)

# Use L2-regularized SGD and 1st-order dyad features.
    trainer.select('l2sgd', 'crf1d')

    # This demonstrates how to list parameters and obtain their values.
    for name in trainer.params():
        print name, trainer.get(name), trainer.help(name)
コード例 #2
0
ファイル: sample_train.py プロジェクト: bratao/CRFSuite
    parser.add_argument("--help-params", help = "output CRFSuite parameters")
    parser.add_argument("-a", "--algorithm", help = "type of graphical model to use", nargs = 1, \
                        type = str, default = "lbfgs", choices = ("lbfgs", "l2sgd", "ap", "pa", \
                                                                  "arow"))
    parser.add_argument("-m", "--model", help = "model in which to store the file", type = str, \
                        default = "")
    parser.add_argument("-t", "--type", help = "type of graphical model to use", \
                        type = str, default = "1d", choices = ("1d", "tree", "semim"))
    parser.add_argument("-v", "--version", help = "output CRFSuite version")
    parser.add_argument("files", help="input files", nargs = '*', type = argparse.FileType('r'),
                        default = [sys.stdin])
    args = parser.parse_args()

    # This demonstrates how to obtain the version string of CRFsuite.
    if args.version:
        print(crfsuite.version())
    elif args.help_params:
        for name in trainer.params():
            print(' '.join([name, trainer.get(name), trainer.help(name)]))
    else:
        if args.model:
            mdir = os.path.dirname(args.model)
            if mdir == "":
                pass
            elif os.path.exists(mdir):
                if not os.path.isdir(mdir) or not os.access(mdir, os.R_OK):
                    print("Can't write to directory '{:s}'.".format(mdir), file = sys.stderr)
            else:
                os.makedirs(mdir)
        # Create a Trainer object.
        trainer = Trainer()
コード例 #3
0
ファイル: crf.py プロジェクト: Balkanlii/nlp
                else:
                    item.append(crfsuite.Attribute(feature))
            items.append(item)

        return self.tag(items)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Train CRFSuite using Python.')
    parser.add_argument('--train')
    parser.add_argument('--tag')
    parser.add_argument('--model', default='crf.model')
    opts = parser.parse_args()

    # This demonstrates how to obtain the version string of CRFsuite.
    print 'CRFSuite v%s' % crfsuite.version()

    if opts.train:
        # Create a Trainer object.
        trainer = Trainer()

        # Read training instances from STDIN, and set them to trainer.
        with open(opts.train) as lines:
            for data, labels in read_svm_format(lines):
                trainer.append(data, labels, 0)

        trainer.save(opts.model)
    else:
        tagger = Tagger(opts.model)
        with open(opts.tag) as lines:
            for data, gold_labels in read_svm_format(lines):
コード例 #4
0
                    item.append(crfsuite.Attribute(feature))
            items.append(item)

        return self.tag(items)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Train CRFSuite using Python.')
    parser.add_argument('--train')
    parser.add_argument('--tag')
    parser.add_argument('--model', default='crf.model')
    opts = parser.parse_args()

    # This demonstrates how to obtain the version string of CRFsuite.
    print 'CRFSuite v%s' % crfsuite.version()

    if opts.train:
        # Create a Trainer object.
        trainer = Trainer()

        # Read training instances from STDIN, and set them to trainer.
        with open(opts.train) as lines:
            for data, labels in read_svm_format(lines):
                trainer.append(data, labels, 0)

        trainer.save(opts.model)
    else:
        tagger = Tagger(opts.model)
        with open(opts.tag) as lines:
            for data, gold_labels in read_svm_format(lines):
コード例 #5
0
ファイル: sample_train.py プロジェクト: pprett/crfsuite
            yseq = crfsuite.StringList()
            continue

        fields = line.split("\t")
        item = crfsuite.Item()
        for field in fields[1:]:
            p = field.rfind(":")
            if p == -1:
                item.append(crfsuite.Attribute(field))
            else:
                item.append(crfsuite.Attribute(field[:p], float(field[p + 1 :])))
        xseq.append(item)
        yseq.append(fields[0])


if __name__ == "__main__":
    fi = sys.stdin
    fo = sys.stdout

    print crfsuite.version()

    trainer = Trainer()
    for xseq, yseq in instances(fi):
        trainer.append(xseq, yseq, 0)

    trainer.select("l2sgd", "crf1d")
    for name in trainer.params():
        print name, trainer.get(name), trainer.help(name)
    print trainer.get("c2")
    trainer.train(sys.argv[1], -1)
コード例 #6
0
                        default="1d",
                        choices=("1d", "tree", "semim"))
    parser.add_argument("-v",
                        "--version",
                        help="output CRFSuite version",
                        action="store_true")
    parser.add_argument("files",
                        help="input files",
                        nargs='*',
                        type=argparse.FileType('r'),
                        default=[sys.stdin])
    args = parser.parse_args()

    # This demonstrates how to obtain the version string of CRFsuite.
    if args.version:
        print(crfsuite.version())
        sys.exit(0)
    # Create a Trainer object.
    trainer = Trainer()
    # Use L2-regularized SGD and 1st-order dyad features.
    if not trainer.select(str(args.algorithm), str(args.type)):
        raise Exception("Could not initialize trainer.")

    if args.help_params:
        for name in trainer.params():
            print(' '.join([name, trainer.get(name), trainer.help(name)]))
    else:
        if args.model:
            mdir = os.path.dirname(args.model)
            if mdir == "":
                pass