import re
import csv
import nltk
import os
import sys
from mychildes import CHILDESCorpusReaderX  #modified nltk
import alignment
import logger1
from pprint import pprint
from nltk.stem import *
from nltk.stem.snowball import SnowballStemmer

logger1.initialize()

#corpus = 'Kuczaj'
smoothing_values = [0, 1]
#outputFile = "Kuczaj300Results.csv"
#markersFile = "wordlists/KuczajMarker300.csv"
#corpus_dir =  'data/Kuczaj/'
#corpus_name = 'Kuczaj'
#marker_list = alignment.readMarkers(markersFile)
speaker_list = []
utterance_dict = {}
squished_dict = {}
convo_dict = {}
squish_counter = 0
convo_counter = 0
total_utterance_reply_dict = {}
total_marker_speaker_dict = {}
total_marker_reply_dict = {}
conditional_conversation_dict = {}
Esempio n. 2
0
            else:
                utterances[i]["msgTokens"] = allMsgTokens[msgMarkerCount:(
                    msgMarkerCount + msgLengths[2 * i])]
                msgMarkerCount += msgLengths[2 * i]
                msgLengthsNew.append(msgLengths[2 * i])
                utterances[i]["replyTokens"] = allMsgTokens[msgMarkerCount:(
                    msgMarkerCount + msgLengths[2 * i + 1])]
                msgMarkerCount += msgLengths[2 * i + 1]
                replyLengthsNew.append(msgLengths[2 * i + 1])
        utterances[i]["convId"] = (utterances[i]["msgUserId"],
                                   utterances[i]["replyUserId"])
    return utterances


#Core calls
start = logger1.initialize()

#Reading in user info and tweets
logger1.log("Reading user info...")
users = readUserInfo()
logger1.log("Reading messages...")
result = readCSV(inputFile, users, numMarkers)
rows = result["rows"]
markers = result["markers"]

#Shuffling tweets if any shuffling has been requested
if (someShuffling):
    logger1.log(rows[0])
    rows = shuffleUtterances(rows, shuffleIds, shuffleTweets, shuffleMarkers,
                             combineMsgReply)
    logger1.log(rows[0])
Esempio n. 3
0
import re
import csv
import nltk
import os
from mychildes import CHILDESCorpusReaderX #modified nltk
import logger1
import alignment
from nltk.stem import *
from nltk.stem.snowball import SnowballStemmer

logger1.initialize()

master_var = True
outputFile = "recursive_markers_results2.csv"
corpus_dir =  r'C:\Users\Aaron\AppData\Roaming\nltk_data\corpora\childes\Providence'
corpus_name = 'Providence'
temp_marker_list = []
markers = [] 
speaker_list = []
utterance_dict = {}
squished_dict = {}
convo_dict = {}
convo_counter = 0
squish_counter = 0
total_utterance_reply_dict = {}
total_marker_speaker_dict = {}
total_marker_reply_dict = {}
conditional_conversation_dict = {}
alignment_dict = {}
word_count_dict = {}
squished_dict = {}
Esempio n. 4
0
				msgMarkerCount += msgLengths[i]
				utterances[i]["replyTokens"] = allReplyTokens[replyMarkerCount:(replyMarkerCount+replyLengths[i])]
				replyMarkerCount += replyLengths[i]
			else:
				utterances[i]["msgTokens"] = allMsgTokens[msgMarkerCount:(msgMarkerCount+msgLengths[2*i])]
				msgMarkerCount += msgLengths[2*i]
				msgLengthsNew.append(msgLengths[2*i])
				utterances[i]["replyTokens"] = allMsgTokens[msgMarkerCount:(msgMarkerCount+msgLengths[2*i+1])]
				msgMarkerCount += msgLengths[2*i+1]
				replyLengthsNew.append(msgLengths[2*i+1])
		utterances[i]["convId"] = (utterances[i]["msgUserId"],utterances[i]["replyUserId"])
	return utterances


#Core calls
start = logger1.initialize()

#Reading in user info and tweets
logger1.log("Reading user info...")
users = readUserInfo()
logger1.log("Reading messages...")
result = readCSV(inputFile, users, numMarkers)
rows = result["rows"]
markers = result["markers"]

#Shuffling tweets if any shuffling has been requested
if(someShuffling):
	logger1.log(rows[0])
	rows = shuffleUtterances(rows, shuffleIds, shuffleTweets, shuffleMarkers, combineMsgReply)
	logger1.log(rows[0])