Ejemplo n.º 1
0
def document_stuff(
    directory_location, input_file_name, marker_list, output_file_name, var_x
):  # writes the final info to a csv file in this order: [DOC ID, speaker, replier, speaker words to replier total, replier words to speaker total, marker, conditional number, speaker marker number, reply marker number, replier utterance number]
    global ordered_utterance_list
    global convo_dict
    global sparsity_measure
    global output_almost
    global final_counter
    global alignment_dict
    global possible_conversation_list
    global speaker_list
    initialize()
    if Stemmed == False:
        get_childes_files(directory_location, input_file_name)
        # if extractRoles:												#comment this and next line out eventually
        # 	return
    else:
        get_childes_stemmed(directory_location, input_file_name)

    determine_speakers(ordered_utterance_list)
    determine_possible_conversations(speaker_list)
    squisher(ordered_utterance_list)
    convo_grouper(squished_dict)
    calculate_sparsity(speaker_list, convo_dict)
    utterances = convo_converter(corpus, input_file_name, convo_dict, marker_list)
    if useCategories:
        catdict = alignment.makeCatDict(marker_list, useREs)
        # pprint(catdict)
        for i in range(0, len(utterances)):
            utterances[i]["msgMarkers"] = alignment.determineCategories(utterances[i]["msgTokens"], catdict, useREs)
            utterances[i]["replyMarkers"] = alignment.determineCategories(utterances[i]["replyTokens"], catdict, useREs)
            # pprint(utterances[0]["msgMarkers"])
        marker_list = list(catdict.keys())
    results = alignment.calculateAlignments(utterances, marker_list, 1, output_file_name, var_x, "CHILDES")
Ejemplo n.º 2
0
def document_stuff(
    directory_location, input_file_name, marker_list, output_file_name, var_x
):  # writes the final info to a csv file in this order: [DOC ID, speaker, replier, speaker words to replier total, replier words to speaker total, marker, conditional number, speaker marker number, reply marker number, replier utterance number]
    global ordered_utterance_list
    global convo_dict
    global sparsity_measure
    global output_almost
    global final_counter
    global alignment_dict
    global possible_conversation_list
    global speaker_list
    initialize()
    if Stemmed == False:
        get_childes_files(directory_location, input_file_name)
        #if extractRoles:												#comment this and next line out eventually
        #	return
    else:
        get_childes_stemmed(directory_location, input_file_name)

    determine_speakers(ordered_utterance_list)
    determine_possible_conversations(speaker_list)
    squisher(ordered_utterance_list)
    convo_grouper(squished_dict)
    calculate_sparsity(speaker_list, convo_dict)
    utterances = convo_converter(corpus, input_file_name, convo_dict,
                                 marker_list)
    if (useCategories):
        catdict = alignment.makeCatDict(marker_list, useREs)
        #pprint(catdict)
        for i in range(0, len(utterances)):
            utterances[i]["msgMarkers"] = alignment.determineCategories(
                utterances[i]["msgTokens"], catdict, useREs)
            utterances[i]["replyMarkers"] = alignment.determineCategories(
                utterances[i]["replyTokens"], catdict, useREs)
        #pprint(utterances[0]["msgMarkers"])
        marker_list = list(catdict.keys())
    results = alignment.calculateAlignments(utterances, marker_list, 1,
                                            output_file_name, var_x, 'CHILDES')
def document_stuff(directory_location, input_file_name, marker_list, output_file_name, corpus): # writes the final info to a csv file in this order: [DOC ID, speaker, replier, speaker words to replier total, replier words to speaker total, marker, conditional number, speaker marker number, reply marker number, replier utterance number]
	global ordered_utterance_list
	global convo_dict
	global sparsity_measure
	global output_almost
	global final_counter
	global alignment_dict
	global possible_conversation_list
	global speaker_list
	initialize()
	if Stemmed == False:
		get_childes_files(directory_location, input_file_name)
	if Stemmed == True:
		get_childes_stemmed(directory_location, input_file_name)	
	determine_speakers(ordered_utterance_list)
	magic_marker_grabber(ordered_utterance_list, 'CHI')
	determine_possible_conversations(speaker_list)
	squisher(ordered_utterance_list)
	convo_grouper(squished_dict)
	calculate_sparsity(speaker_list, convo_dict)
	
	utterances = convo_converter(corpus, input_file_name, convo_dict, marker_list)
	results = alignment.calculateAlignments(utterances, marker_list, 0, output_file_name, var_x)	
Ejemplo n.º 4
0
#Shuffling tweets if any shuffling has been requested
if (someShuffling):
    logger1.log(rows[0])
    rows = shuffleUtterances(rows, shuffleIds, shuffleTweets, shuffleMarkers,
                             combineMsgReply)
    logger1.log(rows[0])

#Adding user info & extracting markers from messages
utterances = transformCSV(markers, users, rows)

#If we're using categories, re-process markers into their category labels before calculating alignment
if (useCategories):
    catdict = alignment.makeCatDict(markers, useREs)
    pprint(catdict)
    for i in range(0, len(utterances)):
        utterances[i]["msgMarkers"] = alignment.determineCategories(
            utterances[i]["msgMarkers"], catdict, useREs)
        utterances[i]["replyMarkers"] = alignment.determineCategories(
            utterances[i]["replyMarkers"], catdict, useREs)
    pprint(utterances[0]["msgMarkers"])
    markers = list(catdict.keys())

#Calculate alignment, print & finish
results = alignment.calculateAlignments(utterances,
                                        markers,
                                        smoothing,
                                        outputFile,
                                        shouldWriteHeader,
                                        corpusType='Twitter')
logger1.finish(start)
Ejemplo n.º 5
0
#Reading in user info and tweets
logger1.log("Reading user info...")
users = readUserInfo()
logger1.log("Reading messages...")
result = readCSV(inputFile, users, numMarkers)
rows = result["rows"]
markers = result["markers"]

#Shuffling tweets if any shuffling has been requested
if(someShuffling):
	logger1.log(rows[0])
	rows = shuffleUtterances(rows, shuffleIds, shuffleTweets, shuffleMarkers, combineMsgReply)
	logger1.log(rows[0])

#Adding user info & extracting markers from messages
utterances = transformCSV(markers, users, rows)

#If we're using categories, re-process markers into their category labels before calculating alignment
if (useCategories):
	catdict = alignment.makeCatDict(markers,useREs)
	pprint(catdict)
	for i in range(0,len(utterances)):
		utterances[i]["msgMarkers"] = alignment.determineCategories(utterances[i]["msgMarkers"],catdict,useREs)
		utterances[i]["replyMarkers"] = alignment.determineCategories(utterances[i]["replyMarkers"],catdict,useREs)
	pprint(utterances[0]["msgMarkers"])
	markers = list(catdict.keys())

#Calculate alignment, print & finish
results = alignment.calculateAlignments(utterances, markers, smoothing, outputFile, shouldWriteHeader,corpusType='Twitter')
logger1.finish(start)