def document_stuff( directory_location, input_file_name, marker_list, output_file_name, var_x ): # writes the final info to a csv file in this order: [DOC ID, speaker, replier, speaker words to replier total, replier words to speaker total, marker, conditional number, speaker marker number, reply marker number, replier utterance number] global ordered_utterance_list global convo_dict global sparsity_measure global output_almost global final_counter global alignment_dict global possible_conversation_list global speaker_list initialize() if Stemmed == False: get_childes_files(directory_location, input_file_name) # if extractRoles: #comment this and next line out eventually # return else: get_childes_stemmed(directory_location, input_file_name) determine_speakers(ordered_utterance_list) determine_possible_conversations(speaker_list) squisher(ordered_utterance_list) convo_grouper(squished_dict) calculate_sparsity(speaker_list, convo_dict) utterances = convo_converter(corpus, input_file_name, convo_dict, marker_list) if useCategories: catdict = alignment.makeCatDict(marker_list, useREs) # pprint(catdict) for i in range(0, len(utterances)): utterances[i]["msgMarkers"] = alignment.determineCategories(utterances[i]["msgTokens"], catdict, useREs) utterances[i]["replyMarkers"] = alignment.determineCategories(utterances[i]["replyTokens"], catdict, useREs) # pprint(utterances[0]["msgMarkers"]) marker_list = list(catdict.keys()) results = alignment.calculateAlignments(utterances, marker_list, 1, output_file_name, var_x, "CHILDES")
def document_stuff( directory_location, input_file_name, marker_list, output_file_name, var_x ): # writes the final info to a csv file in this order: [DOC ID, speaker, replier, speaker words to replier total, replier words to speaker total, marker, conditional number, speaker marker number, reply marker number, replier utterance number] global ordered_utterance_list global convo_dict global sparsity_measure global output_almost global final_counter global alignment_dict global possible_conversation_list global speaker_list initialize() if Stemmed == False: get_childes_files(directory_location, input_file_name) #if extractRoles: #comment this and next line out eventually # return else: get_childes_stemmed(directory_location, input_file_name) determine_speakers(ordered_utterance_list) determine_possible_conversations(speaker_list) squisher(ordered_utterance_list) convo_grouper(squished_dict) calculate_sparsity(speaker_list, convo_dict) utterances = convo_converter(corpus, input_file_name, convo_dict, marker_list) if (useCategories): catdict = alignment.makeCatDict(marker_list, useREs) #pprint(catdict) for i in range(0, len(utterances)): utterances[i]["msgMarkers"] = alignment.determineCategories( utterances[i]["msgTokens"], catdict, useREs) utterances[i]["replyMarkers"] = alignment.determineCategories( utterances[i]["replyTokens"], catdict, useREs) #pprint(utterances[0]["msgMarkers"]) marker_list = list(catdict.keys()) results = alignment.calculateAlignments(utterances, marker_list, 1, output_file_name, var_x, 'CHILDES')
#Shuffling tweets if any shuffling has been requested if (someShuffling): logger1.log(rows[0]) rows = shuffleUtterances(rows, shuffleIds, shuffleTweets, shuffleMarkers, combineMsgReply) logger1.log(rows[0]) #Adding user info & extracting markers from messages utterances = transformCSV(markers, users, rows) #If we're using categories, re-process markers into their category labels before calculating alignment if (useCategories): catdict = alignment.makeCatDict(markers, useREs) pprint(catdict) for i in range(0, len(utterances)): utterances[i]["msgMarkers"] = alignment.determineCategories( utterances[i]["msgMarkers"], catdict, useREs) utterances[i]["replyMarkers"] = alignment.determineCategories( utterances[i]["replyMarkers"], catdict, useREs) pprint(utterances[0]["msgMarkers"]) markers = list(catdict.keys()) #Calculate alignment, print & finish results = alignment.calculateAlignments(utterances, markers, smoothing, outputFile, shouldWriteHeader, corpusType='Twitter') logger1.finish(start)
#Reading in user info and tweets logger1.log("Reading user info...") users = readUserInfo() logger1.log("Reading messages...") result = readCSV(inputFile, users, numMarkers) rows = result["rows"] markers = result["markers"] #Shuffling tweets if any shuffling has been requested if(someShuffling): logger1.log(rows[0]) rows = shuffleUtterances(rows, shuffleIds, shuffleTweets, shuffleMarkers, combineMsgReply) logger1.log(rows[0]) #Adding user info & extracting markers from messages utterances = transformCSV(markers, users, rows) #If we're using categories, re-process markers into their category labels before calculating alignment if (useCategories): catdict = alignment.makeCatDict(markers,useREs) pprint(catdict) for i in range(0,len(utterances)): utterances[i]["msgMarkers"] = alignment.determineCategories(utterances[i]["msgMarkers"],catdict,useREs) utterances[i]["replyMarkers"] = alignment.determineCategories(utterances[i]["replyMarkers"],catdict,useREs) pprint(utterances[0]["msgMarkers"]) markers = list(catdict.keys()) #Calculate alignment, print & finish results = alignment.calculateAlignments(utterances, markers, smoothing, outputFile, shouldWriteHeader,corpusType='Twitter') logger1.finish(start)