Example #1
0
def main():
	
	# Read Files in Data Dir and get Dictionary
	print "Reading Files... \n"
	result_dict = parseFile.getDict('data/*')

	# Get Json Array of Text
	print "%d Records. Preparing Data... \n" % ( len(result_dict) )
	textIdDict = buildJsonArray.getTextAndId(result_dict)
	textJsonString = textIdDict["jsonTextList"]
	idList     = textIdDict["idList"]
	textList   = textIdDict["textList"]
	authorList = textIdDict["authorList"]

	# Sentiment Analysis on Text
	print "Calling Sentiment Analysis API... \n"
	sentimentJsonArray = apiCall.getSentimentResults(textJsonString)
	
	# Associate Results with Id
	print "Compiling Results... \n"
	mySentimentDict = {}
	for index in range( len(idList) ):		
		documentId = idList[index]
		mySentimentDict[documentId] = sentimentJsonArray[index]
		mySentimentDict[documentId]["commentText"] = textList[index]
		mySentimentDict[documentId]["author"] = authorList[index]
	
	# Sentiment Analysis on Text
	print "Calling Sentiment Analysis API... \n"
	sentimentJsonArray = apiCall.getSentimentResults(textJsonString)
	
	# Associate Results with Id
	print "Compiling Results... \n"
	mySentimentDict = {}
	for index in range( len(idList) ):		
		documentId = idList[index]
		mySentimentDict[documentId] = sentimentJsonArray[index]
		mySentimentDict[documentId]["commentText"] = textList[index]
		mySentimentDict[documentId]["author"] = authorList[index]
		
	# Write Sentiment Results to File
	print "Writing to results/sentimentResults.json... \n"
	fileIO.writeJsonFile(mySentimentDict, 'results/sentimentResults.json')

	print "Done"
Example #2
0
def main():
    # Get API KEY as parameter to the function
    if len(sys.argv) > 2:
        API_KEY = sys.argv[1]
        DOCKET_ID = sys.argv[2]
    else:
        print "SYNTAX: getData <API_KEY> <DOCKET_ID>"
        quit()
    # Get Number of Comments
    commentData = apiCall.getLastNComments(API_KEY, DOCKET_ID, str(1000),
                                           str(22000))
    numberofComments = commentData["totalNumRecords"]
    print "Fetching " + numberofComments + " comments."
    # API limit is 1000. Call API 1+numberOfComments/1000 times
    numberofIterations = int(numberofComments / 1000) + 1
    for index in range(0, numberofIterations, 1):
        PAGE_OFFSET = index * 1000
        commentData = apiCall.getLastNComments(API_KEY, DOCKET_ID, str(1000),
                                               str(PAGE_OFFSET))
        fileIO.writeJsonFile(commentData, "data/file" + str(index))
    print "Done."
def getGender(authorList):
    counter = 0
    apiLimit = 0
    apiString = ""
    myNameList = []
    # Preprocess the data and use only
    for index in range(len(authorList)):
        counter += 1
        fname = authorList[index]
        myNameList.append(fname)
        # Get only the first name
        name = fname.split(' ')[0].lstrip(' ')
        re.sub(r'\W+', '', name)
        # Generate string in required API format
        apiString = apiString + "name[%d]=%s&" % (counter, name)
        # Check for API Limit
        if apiLimit > 900:
            break
        # Set limit to 10 names per call
        if (counter == 9):
            # Call the API and write results to File
            results = callGenderAPI(apiString)
            myJsonDict = processResults(results, myNameList)
            print "Processing \n"
            # Reset counters after 10
            apiString = ""
            counter = 0
            apiLimit += 10
            myDocIdList = []
            myNameList = []
    # Call API last time for remaining names
    if apiString != "":
        callGenderAPI(apiString)
    # Write Gender Results to File
    print "Writing to results/genderResults.json... \n"
    fileIO.writeJsonFile(myJsonDict, 'results/genderResults.json')
def getGender(authorList):
	counter     = 0
	apiLimit    = 0
	apiString   = ""
	myNameList  = []
	# Preprocess the data and use only
	for index in range ( len(authorList) ):
		counter += 1
		fname = authorList[index]
		myNameList.append(fname)
		# Get only the first name
		name = fname.split(' ')[0].lstrip(' ')
		re.sub(r'\W+', '', name)
		# Generate string in required API format
		apiString = apiString + "name[%d]=%s&" % (counter, name)
		# Check for API Limit
		if apiLimit > 900:
			break
		# Set limit to 10 names per call
		if (counter == 9):
			# Call the API and write results to File
			results = callGenderAPI(apiString)
			myJsonDict = processResults(results, myNameList)
			print "Processing \n"
			# Reset counters after 10
			apiString = ""
			counter   = 0
			apiLimit += 10 
			myDocIdList = []
			myNameList = []
	# Call API last time for remaining names
	if apiString != "":
		callGenderAPI(apiString)
	# Write Gender Results to File
	print "Writing to results/genderResults.json... \n"
	fileIO.writeJsonFile(myJsonDict, 'results/genderResults.json')