Python getFeatures Examples

Programming Language: Python

Namespace/Package Name: training.utils

Method/Function: getFeatures

Examples at hotexamples.com: 4

Python getFeatures - 4 examples found. These are the top rated real world Python examples of training.utils.getFeatures extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: cli.py Project: BeelineShopping/BeelineDB

    def generate_data(self, count, offset):
        """
        Generates training data in the CRF++ format for the ingredient
        tagging task
        """
        df = pd.read_csv(self.opts.data_path)
        df = df.fillna("")

        start = int(offset)
        end = int(offset) + int(count)

        df_slice = df.iloc[start: end]

        for index, row in df_slice.iterrows():
            try:
                # extract the display name
                display_input = utils.cleanUnicodeFractions(row["input"])
                tokens = utils.tokenize(display_input)
                del(row["input"])

                rowData = self.addPrefixes([(t, self.matchUp(t, row)) for t in tokens])

                for i, (token, tags) in enumerate(rowData):
                    features = utils.getFeatures(token, i+1, tokens)
                    print utils.joinLine([token] + features + [self.bestTag(tags)])

            # ToDo: deal with this
            except UnicodeDecodeError:
                pass

            print

Example #2

Show file

    def generate_data(self, count, offset):
        """
        Generates training data in the CRF++ format for the ingredient
        tagging task
        """
        df = pd.read_csv(self.opts.data_path)
        df = df.fillna("")

        start = int(offset)
        end = int(offset) + int(count)

        df_slice = df.iloc[start:end]

        for index, row in df_slice.iterrows():
            try:
                # extract the display name
                display_input = utils.cleanUnicodeFractions(row["input"])
                tokens = utils.tokenize(display_input)
                del (row["input"])

                rowData = self.addPrefixes([(t, self.matchUp(t, row))
                                            for t in tokens])

                for i, (token, tags) in enumerate(rowData):
                    features = utils.getFeatures(token, i + 1, tokens)
                    print utils.joinLine([token] + features +
                                         [self.bestTag(tags)])

            # ToDo: deal with this
            except UnicodeDecodeError:
                pass

            print

Example #3

Show file

File: parse-ingredients.py Project: BeelineShopping/BeelineMatcher

#!/usr/bin/env python

import sys
import os
import re

sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from training import utils

if len(sys.argv) < 2:
    sys.stderr.write('Usage: parse-ingredients.py FILENAME')
    sys.exit(1)

FILENAME = str(sys.argv[1])
tmpFile = FILENAME + ".tmp"

with open(FILENAME) as infile, open(tmpFile, 'w') as outfile:
    for line in infile:
        line_clean = re.sub('<[^<]+?>', '', line)
        tokens = utils.tokenize(line_clean)

        for i, token in enumerate(tokens):
            features = utils.getFeatures(token, i + 1, tokens)
            outfile.write(utils.joinLine([token] + features) + "\n")
        outfile.write("\n")

tmpFilePath = "../../tmp/model_file"
modelFilename = os.path.join(os.path.dirname(__file__), tmpFilePath)
os.system("crf_test -v 1 -m %s %s" % (modelFilename, tmpFile))
os.system("rm %s" % tmpFile)

Example #4

Show file

File: parse-ingredients.py Project: BeelineShopping/BeelineDB

#!/usr/bin/env python

import sys
import os
import re

sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from training import utils

if len(sys.argv) < 2:
    sys.stderr.write('Usage: parse-ingredients.py FILENAME')
    sys.exit(1)

FILENAME = str(sys.argv[1])
tmpFile = FILENAME + ".tmp"

with open(FILENAME) as infile, open(tmpFile, 'w') as outfile:
    for line in infile:
        line_clean = re.sub('<[^<]+?>', '', line)
        tokens = utils.tokenize(line_clean)

        for i, token in enumerate(tokens):
            features = utils.getFeatures(token, i+1, tokens)
            outfile.write(utils.joinLine([token] + features) + "\n")
        outfile.write("\n")

tmpFilePath = "../../tmp/model_file"
modelFilename = os.path.join(os.path.dirname(__file__), tmpFilePath)
os.system("crf_test -v 1 -m %s %s" % (modelFilename, tmpFile))
os.system("rm %s" % tmpFile)