def simpleSentiment(trainingset, devin, devout): temp = parseFile(trainingset) tokens = temp[0] tags = temp[1] print("Training started.") emissionTable = calculateEmission(tokens, tags, 3) print("Training Completed.\n") inputTokens = parseFileInput(devin) inputTags = [] pprint(inputTokens) # max(stats, key=stats.get) for i in inputTokens: if i == None: inputTags.append("#SPACE#") else: if i in emissionTable: inputTags.append( max(emissionTable[i], key=emissionTable[i].get)) else: inputTags.append( max(emissionTable['#UNK#'], key=emissionTable['#UNK#'].get)) pprint(inputTags) writeout( inputTokens, inputTags, '/Users/aditya/Desktop/Machine Learning/Project/MLProject/EN/EN/output' )
def simpleSentimentAnalysis(train, devin, devout): traindata = parseTrainFile(train) tokens = traindata[0] tags = traindata[1] emissionParams = calculateEmission(tags, tokens, 3) inputTokens = parseFileInput(devin) predictedTags = simple(inputTokens, emissionParams) writeout(inputTokens, predictedTags, devout)
def maxMarginalSentimentAnalysis(train, devin, devout): traindata = parseTrainFile(train) tokens = traindata[0] tags = traindata[1] emissionParams = calculateEmission(tags, tokens, 3) transitionParams = calculateTransition(tags) inputTweets = parseMMInput(devin) predictedTags = [] for tweet in inputTweets: predictedTag = maxMarginal(tweet, transitionParams, emissionParams) for t in predictedTag: predictedTags.append(t) predictedTags.append(None) writeout(parseFileInput(devin), predictedTags, devout)
def improvedMaxMarginalSentimentAnalysis(train, devin, devout): """ Improve the parsing of train data and input data and update model parameters (Refer to report) Using max-marginal decoding algorithm to predict the most likely tag sequence for the given tokens """ traindata = parseTrainP5(train) tokens = traindata[0] tags = traindata[1] emissionParams = calculateEmission(tags, tokens, 3) transitionParams = calculateTransition(tags) inputTweets = parseInputP5(devin) for i in range(0, 1): predictedTags = [] emissionParam = calculateEmission(tags, tokens, 3) transitionParam = calculateTransition(tags) for tweet in inputTweets: predictedTag = maxMarginal(tweet, transitionParam, emissionParam) for t in predictedTag: predictedTags.append(t) predictedTags.append(None) for word in tweet: tokens.append(word) tokens.append(None) for tag in predictedTags: tags.append(tag) predictedTags = [] for tweet in inputTweets: predictedTag = maxMarginal(tweet, transitionParams, emissionParams) for t in predictedTag: predictedTags.append(t) predictedTags.append(None) writeout(parseFileInput(devin), predictedTags, devout)