Beispiel #1
0
import preprocess
from sklearn.externals import joblib

tags = {"UNKNOWN": 0, "O": 1, "B": 2, "I": 3, "START": 4}
reverse_tags = {0: "UNKNOWN", 1: "O", 2: "B", 3: "I"}
word_dict = None
numFeatures = None

if __name__ == "__main__":
  train = sys.argv[1]
  test = sys.argv[2]
  model_number = sys.argv[3]
  X = None
  y = None

  retagged_train, word_dict, totalNumWords = preprocess.iob(train)
  global numFeatures
  numDistinctWords = len(word_dict)
  numFeatures = numDistinctWords + 1 + 9 + 12 + 2
  X = scipy.sparse.lil_matrix((totalNumWords, numFeatures))
  y = np.empty([totalNumWords, ])
  #training_seen = [0] * totalNumWords
  with open(retagged_train) as f:
    identifier_line = True
    split_line = None
    word = None
    tag = None
    length = None
    alpha = None
    allCaps = None
    hasNumeric = None
Beispiel #2
0
            deltaO[index] = scoreI, "I", counts, phi, wordIndex
        if tag == "B":
            deltaB[index] = scoreI, "I", counts, phi, wordIndex
        if tag == "I":
            deltaI[index] = scoreI, "I", counts, phi, wordIndex
        return scoreI, "I", counts, phi, wordIndex


if __name__ == "__main__":
    train = sys.argv[1]
    test = sys.argv[2]
    model_number = sys.argv[3]
    X = None
    y = None

    retagged_train, word_dict, totalNumWords = preprocess.iob(train)
    global numFeatures
    numDistinctWords = len(word_dict)
    numFeatures = numDistinctWords + 1 + 6 + 8 + 1
    X = scipy.sparse.lil_matrix((totalNumWords, numFeatures))
    y = np.empty([
        totalNumWords,
    ])
    with open(retagged_train) as f:
        identifier_line = True
        split_line = None
        word = None
        tag = None
        length = None
        alpha = None
        allCaps = None
Beispiel #3
0
import preprocess
from sklearn.externals import joblib

tags = {"UNKNOWN": 0, "O": 1, "B": 2, "I": 3}
reverse_tags = {0: "UNKNOWN", 1: "O", 2: "B", 3: "I"}
word_dict = None
numFeatures = None

if __name__ == "__main__":
    train = sys.argv[1]
    test = sys.argv[2]
    model_number = sys.argv[3]
    X = None
    y = None

    retagged_train, word_dict = preprocess.iob(train)
    global numFeatures
    numWords = len(word_dict)
    numFeatures = numWords + 3 + 4
    X = scipy.sparse.lil_matrix((numWords, numFeatures))
    y = np.empty([
        numWords,
    ])
    training_seen = [0] * numWords
    with open(retagged_train) as f:
        identifier_line = True
        split_line = None
        word = None
        tag = None
        length = None
        alpha = None
Beispiel #4
0
import preprocess
from sklearn.externals import joblib

tags = {"UNKNOWN": 0, "O": 1, "B": 2, "I": 3}
reverse_tags = {0: "UNKNOWN", 1: "O", 2: "B", 3: "I"}
word_dict = None
numFeatures = None

if __name__ == "__main__":
  train = sys.argv[1]
  test = sys.argv[2]
  model_number = sys.argv[3]
  X = None
  y = None

  retagged_train, word_dict = preprocess.iob(train)
  global numFeatures
  numWords = len(word_dict)
  numFeatures = numWords + 3 + 4
  X = scipy.sparse.lil_matrix((numWords, numFeatures))
  y = np.empty([numWords, ])
  training_seen = [0] * numWords
  with open(retagged_train) as f:
    identifier_line = True
    split_line = None
    word = None
    tag = None
    length = None
    alpha = None
    allCaps = None
    hasNumeric = None