def features(inputDir):
    features = []
    labels = []
    #getting data from given directory as list
    csv_data = list(tools.get_data(inputDir))
    for dialogue in csv_data:
        for i in range(len(dialogue)):
            features.append(sentFeatures(dialogue, i))
            labels.append(sentLabels(dialogue[i]))
    return features, labels, csv_data
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 11 12:45:35 2020

@author: Likhita Suresh
"""

import pycrfsuite
import hw2_corpus_tools as tools
import os

path = os.getcwd() + "\\trainSmall"
print(path)

csv_data = list(tools.get_data(path))
print(csv_data)
import pycrfsuite
import hw2_corpus_tools as tools
import os, sys

#getting path to train date - appending train data file to current path
#path = os.getcwd()+"\\trainSmall"
inputDir = sys.argv[1]
testDir = sys.argv[2]
outputFile = sys.argv[3]

#print (path)

#print (csv_data[0][0][2][0].token)
csv_data = list(tools.get_data(inputDir))


#feature for every token
def tokenFeatures(wordList):
    tokens = []
    letter_pairs = ["th", "he", "in"]
    for word in wordList:
        tokens.append('TOKEN_' + word.token)
        for pair in letter_pairs:
            if pair in word:
                tokens.append('TOKEN_' + pair)

    for i in range(len(wordList) - 1):
        tokens.append('TOKEN_' + wordList[i].token + wordList[i + 1].token)
    return tokens