예제 #1
0
def simpleSentiment(trainingset, devin, devout):
    temp = parseFile(trainingset)
    tokens = temp[0]
    tags = temp[1]

    print("Training started.")
    emissionTable = calculateEmission(tokens, tags, 3)
    print("Training Completed.\n")

    inputTokens = parseFileInput(devin)
    inputTags = []
    pprint(inputTokens)

    # max(stats, key=stats.get)

    for i in inputTokens:
        if i == None:
            inputTags.append("#SPACE#")
        else:
            if i in emissionTable:
                inputTags.append(
                    max(emissionTable[i], key=emissionTable[i].get))
            else:
                inputTags.append(
                    max(emissionTable['#UNK#'],
                        key=emissionTable['#UNK#'].get))

    pprint(inputTags)
    writeout(
        inputTokens, inputTags,
        '/Users/aditya/Desktop/Machine Learning/Project/MLProject/EN/EN/output'
    )
예제 #2
0
def simpleSentimentAnalysis(train, devin, devout):
    traindata = parseTrainFile(train)
    tokens = traindata[0]
    tags = traindata[1]

    emissionParams = calculateEmission(tags, tokens, 3)

    inputTokens = parseFileInput(devin)

    predictedTags = simple(inputTokens, emissionParams)

    writeout(inputTokens, predictedTags, devout)
예제 #3
0
def maxMarginalSentimentAnalysis(train, devin, devout):
    traindata = parseTrainFile(train)
    tokens = traindata[0]
    tags = traindata[1]

    emissionParams = calculateEmission(tags, tokens, 3)
    transitionParams = calculateTransition(tags)

    inputTweets = parseMMInput(devin)

    predictedTags = []

    for tweet in inputTweets:
        predictedTag = maxMarginal(tweet, transitionParams, emissionParams)
        for t in predictedTag:
            predictedTags.append(t)
        predictedTags.append(None)

    writeout(parseFileInput(devin), predictedTags, devout)
예제 #4
0
def improvedMaxMarginalSentimentAnalysis(train, devin, devout):
    """
    Improve the parsing of train data and input data and update model parameters (Refer to report)
    Using max-marginal decoding algorithm to predict the most likely tag sequence for the given tokens

    """
    traindata = parseTrainP5(train)
    tokens = traindata[0]
    tags = traindata[1]

    emissionParams = calculateEmission(tags, tokens, 3)
    transitionParams = calculateTransition(tags)

    inputTweets = parseInputP5(devin)

    for i in range(0, 1):
        predictedTags = []
        emissionParam = calculateEmission(tags, tokens, 3)
        transitionParam = calculateTransition(tags)
        for tweet in inputTweets:
            predictedTag = maxMarginal(tweet, transitionParam, emissionParam)
            for t in predictedTag:
                predictedTags.append(t)
            predictedTags.append(None)

            for word in tweet:
                tokens.append(word)
            tokens.append(None)

        for tag in predictedTags:
            tags.append(tag)

    predictedTags = []

    for tweet in inputTweets:
        predictedTag = maxMarginal(tweet, transitionParams, emissionParams)
        for t in predictedTag:
            predictedTags.append(t)
        predictedTags.append(None)

    writeout(parseFileInput(devin), predictedTags, devout)