Beispiel #1
0
def googleSearch(searchTerm):
#
#   AUTHOR: Kristin Funch
	print searchTerm
	print '\n\n'
	topRelatedWords = []
	relevantWords = ' '
	result1 = ''
	result2 = ''
	result3 = ''
	result4 = ''

	print 'ANECDOTE\n'
	#ANECDOTE
	query = "\"when I first heard about " + searchTerm + "\""
	results4 = bing_api(query)
	#print len(results4['d']['results'])
	if (len(results4['d']['results']) < 1):
		query = "when I first heard about " + searchTerm
		results4 = bing_api(query)
	elif (len(results4['d']['results']) < 5):
		query = "when I first heard about " + searchTerm
		#c = {key: value for (key, value) in (a.items() + b.items())}
		results4['d']['results'] = [x for x in (bing_api(query)['d']['results'] + results4['d']['results'])]
	#print len(results4['d']['results'])
	paragraphArr = []
	cleanParagraphArr = []
	searchTermParArr = []
	cleanSearchTermParArr = []
	indices = []
	searchTermIndices = []
	for x in range(0, len(results4['d']['results'])):
		url = results4['d']['results'][x]['Url']
		paragraphs = findSentence(url, searchTerm, 'when I first heard about ', '', 1)
		if (len(paragraphs[0]) < 1):
			paragraphs = findSentence(url, searchTerm, 'when I first heard about ', '', 0)
		for y in range(0, len(paragraphs[0])):
			if (paragraphs[2]):
				paragraphArr.append(paragraphs[0][y])
				cleanParagraphArr.append(paragraphs[1][y])
				indices.append(x)
			else:
				searchTermParArr.append(paragraphs[0][y])
				cleanSearchTermParArr.append(paragraphs[1][y])
				searchTermIndices.append(x)
	if (len(paragraphArr) < 1):
		paragraphArr = searchTermParArr
		cleanParagraphArr = cleanSearchTermParArr
		indices = searchTermIndices
	if (len(cleanParagraphArr) > 0):
		parIndex = bagOfWordsGivenParagraphs(cleanParagraphArr)[1]
		top2 = bagOfWordsUrlTopTwo(results4['d']['results'][indices[parIndex]]['Url'], searchTerm)
		#top2 = bagOfWordsUrlTopTwo(results4[indices[parIndex]]['url'], searchTerm)
		relevantWords = relevantWords + top2[0] + ' ' + top2[1] + ' '
		print top2[0]
		print '\n'
		print top2[1]
		print '\n'
		print paragraphArr[parIndex]
		result1 = paragraphArr[parIndex]
		print '\n'

	print 'PROBLEM\n'
	#PROBLEM
	query = "\"the problem with " + searchTerm + " is\""
	results4 = bing_api(query)
	if (len(results4['d']['results']) < 1):
		query = "the problem with " + searchTerm + relevantWords
		results4 = bing_api(query)

	paragraphArr = []
	cleanParagraphArr = []
	for x in results4['d']['results']:
		url = x['Url']
		paragraphs = findSentence(url, searchTerm, 'the problem with ', ' is', 1)
		if (len(paragraphs[0]) < 1):
			paragraphs = findSentence(url, searchTerm, 'the problem with ', ' is', 0)
		for y in range(0, len(paragraphs[0])):
			paragraphArr.append(paragraphs[0][y])
			cleanParagraphArr.append(paragraphs[1][y])
	if (len(cleanParagraphArr) > 0):
		print paragraphArr[bagOfWordsGivenParagraphs(cleanParagraphArr)[0]]
		result2 = paragraphArr[bagOfWordsGivenParagraphs(cleanParagraphArr)[0]]
		print '\n'

	print 'WIKIPEDIA\n'
	#WIKI
	#query = searchTerm + relevantWords + ' site:en.wikipedia.org'
	query = searchTerm + relevantWords + ' site:en.wikipedia.org'
	results = bing_api(query)

	for x in range(0, 1):
		url = results['d']['results'][x]['Url']
		print bagOfWordsByParagraph(url)[0]
		result3 = bagOfWordsByParagraph(url)[0]
		print '\n'

	print 'SOLUTION\n'
	#SOLUTION
	#query = "\"solution to " + searchTerm + "\"" + relevantWords
	query = "\"solution to " + searchTerm + "\""
	results4 = bing_api(query)
	if (len(results4['d']['results']) < 1):
		query = "solution to " + searchTerm + relevantWords
		results4 = bing_api(query)

	paragraphArr = []
	cleanParagraphArr = []
	searchTermParArr = []
	cleanSearchTermParArr = []
	for x in results4['d']['results']:
		url = x['Url']
		paragraphs = findSentence(url, searchTerm, 'solution to ', '', 1)
		if (len(paragraphs[0]) < 1):
			paragraphs = findSentence(url, searchTerm, 'solution to ', '', 0)
		for y in range(0, len(paragraphs[0])):
			if (paragraphs[2]):
				paragraphArr.append(paragraphs[0][y])
				cleanParagraphArr.append(paragraphs[1][y])
			else:
				searchTermParArr.append(paragraphs[0][y])
				cleanSearchTermParArr.append(paragraphs[1][y])
	if (len(paragraphArr) < 1):
		paragraphArr = searchTermParArr
		cleanParagraphArr = cleanSearchTermParArr
	if (len(cleanParagraphArr) > 0):
		parIndex = bagOfWordsGivenParagraphs(cleanParagraphArr)[2]
		print paragraphArr[parIndex]
		result4 = paragraphArr[parIndex]
		print '\n'

	return [result1, result2, result3, result4]
Beispiel #2
0
def googleSearch(searchTerm):
#
	print searchTerm
	print '\n\n'
	topRelatedWords = []
	relevantWords = ' '
	result1 = ''
	result2 = ''
	result3 = ''
	result4 = ''
	top2 = []

	print 'ANECDOTE\n'
	#ANECDOTE
	query = "\"when I first heard about " + searchTerm + "\""	#exact phrasing
	results4 = bing_api(query)
	if (len(results4['d']['results']) < 1):	#if no results from exact phrasing
		query = "when I first heard about " + searchTerm 	#inexact phrasing
		results4 = bing_api(query)
	elif (len(results4['d']['results']) < 5):	#if < 5 results from exact phrasing
		query = "when I first heard about " + searchTerm 	#inexact phrasing
		results4['d']['results'] = [x for x in (bing_api(query)['d']['results'] + results4['d']['results'])]	#combine results

	paragraphArr = []
	cleanParagraphArr = []
	searchTermParArr = []
	cleanSearchTermParArr = []
	indices = []
	searchTermIndices = []
	for x in range(0, len(results4['d']['results'])):
		url = results4['d']['results'][x]['Url']
		paragraphs = findSentence(url, searchTerm, 'when I first heard about ', '', 1) #search for exact phrase
		if (len(paragraphs[0]) < 1):
			paragraphs = findSentence(url, searchTerm, 'when I first heard about ', '', 0) #search for paragraph containing fragments
		for y in range(0, len(paragraphs[0])):	#if paragraphs found
			if (paragraphs[2]):	#if exact phrasing found
				paragraphArr.append(paragraphs[0][y])
				cleanParagraphArr.append(paragraphs[1][y])
				indices.append(x)
			else:	#if partial
				searchTermParArr.append(paragraphs[0][y])
				cleanSearchTermParArr.append(paragraphs[1][y])
				searchTermIndices.append(x)
	if (len(paragraphArr) < 1):	#if exact phrasing not found
		paragraphArr = searchTermParArr
		cleanParagraphArr = cleanSearchTermParArr
		indices = searchTermIndices
	if (len(cleanParagraphArr) > 0):	#if any paragraphs found
		parIndex = bagOfWordsGivenParagraphs(cleanParagraphArr, top2)[1]	#best ranked by anecdote keywords
		top2 = bagOfWordsUrlTopTwo(results4['d']['results'][indices[parIndex]]['Url'], searchTerm)	#top two words on page not in search term
		relevantWords = relevantWords + top2[0] + ' ' + top2[1] + ' '
		print top2[0]
		print '\n'
		print top2[1]
		print '\n'
		result1 = paragraphArr[parIndex]
		print result1
		print '\n'

	print 'PROBLEM\n'
	#PROBLEM
	query = "\"the problem with " + searchTerm + " is\""	#exact phrasing
	results4 = bing_api(query)
	if (len(results4['d']['results']) < 1):	#if no results from exact phrasing
		query = "the problem with " + searchTerm + relevantWords	#inexact with top relevant words
		results4 = bing_api(query)
	elif (len(results4['d']['results']) < 5):	#elif < 5 results from exact phrasing
		query = "the problem with " + searchTerm + relevantWords	#inexact with top relevant words
		results4['d']['results'] = [x for x in (bing_api(query)['d']['results'] + results4['d']['results'])]	#combine results
	
	if (len(results4['d']['results']) < 1): #if still no results
		query = "the problem with " + searchTermIndices	#search without top relevant words
		results4 = bing_api(query)
		print len(results4['d']['results'])
	elif (len(results4['d']['results']) < 5):	#elif still < 5 results
		query = "the problem with " + searchTermIndices	#search without top relevant words
		results4['d']['results'] = [x for x in (bing_api(query)['d']['results'] + results4['d']['results'])]	#combine results

	paragraphArr = []
	cleanParagraphArr = []
	for x in results4['d']['results']:
		url = x['Url']
		paragraphs = findSentence(url, searchTerm, 'the problem with ', ' is', 1)	#search for exact phrase
		if (len(paragraphs[0]) < 1):	#if exact phrase not found
			paragraphs = findSentence(url, searchTerm, 'the problem with ', ' is', 0)	#search for paragraphs containing fragments
		for y in range(0, len(paragraphs[0])):
			paragraphArr.append(paragraphs[0][y])
			cleanParagraphArr.append(paragraphs[1][y])
	if (len(cleanParagraphArr) > 0):
		result2 = paragraphArr[bagOfWordsGivenParagraphs(cleanParagraphArr, top2)[0]]
		print result2
		print '\n'

	print 'WIKIPEDIA\n'
	#WIKI
	query = searchTerm + relevantWords + ' site:en.wikipedia.org'
	results = bing_api(query)
	if (len(results['d']['results']) < 1):	#if no results
		query = searchTerm + ' site:en.wikipedia.org'	#search without top relevant words
		results = bing_api(query)

	if (len(results['d']['results']) > 0):
		for x in range(0, 1):
			url = results['d']['results'][x]['Url']
			result3 = re.sub(r'\[[^\]]*\]', ' ', bagOfWordsByParagraph(url)[0])	#remove anything between square brackets
			result3 = re.sub(r'\<[^\>]*\>', ' ', result3)	#remove anything between angle brackets
			result3 = re.sub(r'\([^\)]*\)', ' ', result3)	#remove anything between parentheses
			print result3
			print '\n'

	print 'SOLUTION\n'
	#SOLUTION
	query = "\"solution to " + searchTerm + "\""	#exact phrasing
	results4 = bing_api(query)
	if (len(results4['d']['results']) < 1):	#if no results from exact phrasing
		query = "solution to " + searchTerm + relevantWords	#search with top relevant words
		results4 = bing_api(query)
	elif (len(results4['d']['results']) < 5):	#if < 5 results from exact phrasing
		query = "solution to " + searchTerm + relevantWords	#search with top relevant words
		results4['d']['results'] = [x for x in (bing_api(query)['d']['results'] + results4['d']['results'])]	#combine results

	if (len(results4['d']['results']) < 1):	#if still no results
		query = "solution to " + searchTerm 	#search without top relevant words
		results4 = bing_api(query)
	elif (len(results4['d']['results']) < 5):	#if still < 5 results
		query = "solution to " + searchTerm 	#search without top relevant words
		results4['d']['results'] = [x for x in (bing_api(query)['d']['results'] + results4['d']['results'])]	#combine results

	paragraphArr = []
	cleanParagraphArr = []
	searchTermParArr = []
	cleanSearchTermParArr = []
	for x in results4['d']['results']:
		url = x['Url']
		paragraphs = findSentence(url, searchTerm, 'solution to ', '', 1)
		if (len(paragraphs[0]) < 1):
			paragraphs = findSentence(url, searchTerm, 'solution to ', '', 0)
		for y in range(0, len(paragraphs[0])):
			if (paragraphs[2]):
				paragraphArr.append(paragraphs[0][y])
				cleanParagraphArr.append(paragraphs[1][y])
			else:
				searchTermParArr.append(paragraphs[0][y])
				cleanSearchTermParArr.append(paragraphs[1][y])
	if (len(paragraphArr) < 1):
		paragraphArr = searchTermParArr
		cleanParagraphArr = cleanSearchTermParArr
	if (len(cleanParagraphArr) > 0):
		parIndex = bagOfWordsGivenParagraphs(cleanParagraphArr, top2)[2]
		print paragraphArr[parIndex]
		result4 = paragraphArr[parIndex]
		print '\n'

	return [result1, result2, result3, result4]
Beispiel #3
0
def googleSearch(searchTerm):
    #
    print searchTerm
    print '\n\n'
    topRelatedWords = []
    relevantWords = ' '
    result1 = ''
    result2 = ''
    result3 = ''
    result4 = ''
    top2 = []

    print 'ANECDOTE\n'
    #ANECDOTE
    query = "\"when I first heard about " + searchTerm + "\""  #exact phrasing
    results4 = bing_api(query)
    if (len(results4['d']['results']) < 1):  #if no results from exact phrasing
        query = "when I first heard about " + searchTerm  #inexact phrasing
        results4 = bing_api(query)
    elif (len(results4['d']['results']) <
          5):  #if < 5 results from exact phrasing
        query = "when I first heard about " + searchTerm  #inexact phrasing
        results4['d']['results'] = [
            x for x in (bing_api(query)['d']['results'] +
                        results4['d']['results'])
        ]  #combine results

    paragraphArr = []
    cleanParagraphArr = []
    searchTermParArr = []
    cleanSearchTermParArr = []
    indices = []
    searchTermIndices = []
    for x in range(0, len(results4['d']['results'])):
        url = results4['d']['results'][x]['Url']
        paragraphs = findSentence(url, searchTerm, 'when I first heard about ',
                                  '', 1)  #search for exact phrase
        if (len(paragraphs[0]) < 1):
            paragraphs = findSentence(
                url, searchTerm, 'when I first heard about ', '',
                0)  #search for paragraph containing fragments
        for y in range(0, len(paragraphs[0])):  #if paragraphs found
            if (paragraphs[2]):  #if exact phrasing found
                paragraphArr.append(paragraphs[0][y])
                cleanParagraphArr.append(paragraphs[1][y])
                indices.append(x)
            else:  #if partial
                searchTermParArr.append(paragraphs[0][y])
                cleanSearchTermParArr.append(paragraphs[1][y])
                searchTermIndices.append(x)
    if (len(paragraphArr) < 1):  #if exact phrasing not found
        paragraphArr = searchTermParArr
        cleanParagraphArr = cleanSearchTermParArr
        indices = searchTermIndices
    if (len(cleanParagraphArr) > 0):  #if any paragraphs found
        parIndex = bagOfWordsGivenParagraphs(
            cleanParagraphArr, top2)[1]  #best ranked by anecdote keywords
        top2 = bagOfWordsUrlTopTwo(
            results4['d']['results'][indices[parIndex]]['Url'],
            searchTerm)  #top two words on page not in search term
        relevantWords = relevantWords + top2[0] + ' ' + top2[1] + ' '
        print top2[0]
        print '\n'
        print top2[1]
        print '\n'
        result1 = paragraphArr[parIndex]
        print result1
        print '\n'

    print 'PROBLEM\n'
    #PROBLEM
    query = "\"the problem with " + searchTerm + " is\""  #exact phrasing
    results4 = bing_api(query)
    if (len(results4['d']['results']) < 1):  #if no results from exact phrasing
        query = "the problem with " + searchTerm + relevantWords  #inexact with top relevant words
        results4 = bing_api(query)
    elif (len(results4['d']['results']) <
          5):  #elif < 5 results from exact phrasing
        query = "the problem with " + searchTerm + relevantWords  #inexact with top relevant words
        results4['d']['results'] = [
            x for x in (bing_api(query)['d']['results'] +
                        results4['d']['results'])
        ]  #combine results

    if (len(results4['d']['results']) < 1):  #if still no results
        query = "the problem with " + searchTermIndices  #search without top relevant words
        results4 = bing_api(query)
        print len(results4['d']['results'])
    elif (len(results4['d']['results']) < 5):  #elif still < 5 results
        query = "the problem with " + searchTermIndices  #search without top relevant words
        results4['d']['results'] = [
            x for x in (bing_api(query)['d']['results'] +
                        results4['d']['results'])
        ]  #combine results

    paragraphArr = []
    cleanParagraphArr = []
    for x in results4['d']['results']:
        url = x['Url']
        paragraphs = findSentence(url, searchTerm, 'the problem with ', ' is',
                                  1)  #search for exact phrase
        if (len(paragraphs[0]) < 1):  #if exact phrase not found
            paragraphs = findSentence(
                url, searchTerm, 'the problem with ', ' is',
                0)  #search for paragraphs containing fragments
        for y in range(0, len(paragraphs[0])):
            paragraphArr.append(paragraphs[0][y])
            cleanParagraphArr.append(paragraphs[1][y])
    if (len(cleanParagraphArr) > 0):
        result2 = paragraphArr[bagOfWordsGivenParagraphs(
            cleanParagraphArr, top2)[0]]
        print result2
        print '\n'

    print 'WIKIPEDIA\n'
    #WIKI
    query = searchTerm + relevantWords + ' site:en.wikipedia.org'
    results = bing_api(query)
    if (len(results['d']['results']) < 1):  #if no results
        query = searchTerm + ' site:en.wikipedia.org'  #search without top relevant words
        results = bing_api(query)

    if (len(results['d']['results']) > 0):
        for x in range(0, 1):
            url = results['d']['results'][x]['Url']
            result3 = re.sub(r'\[[^\]]*\]', ' ',
                             bagOfWordsByParagraph(url)
                             [0])  #remove anything between square brackets
            result3 = re.sub(r'\<[^\>]*\>', ' ',
                             result3)  #remove anything between angle brackets
            result3 = re.sub(r'\([^\)]*\)', ' ',
                             result3)  #remove anything between parentheses
            print result3
            print '\n'

    print 'SOLUTION\n'
    #SOLUTION
    query = "\"solution to " + searchTerm + "\""  #exact phrasing
    results4 = bing_api(query)
    if (len(results4['d']['results']) < 1):  #if no results from exact phrasing
        query = "solution to " + searchTerm + relevantWords  #search with top relevant words
        results4 = bing_api(query)
    elif (len(results4['d']['results']) <
          5):  #if < 5 results from exact phrasing
        query = "solution to " + searchTerm + relevantWords  #search with top relevant words
        results4['d']['results'] = [
            x for x in (bing_api(query)['d']['results'] +
                        results4['d']['results'])
        ]  #combine results

    if (len(results4['d']['results']) < 1):  #if still no results
        query = "solution to " + searchTerm  #search without top relevant words
        results4 = bing_api(query)
    elif (len(results4['d']['results']) < 5):  #if still < 5 results
        query = "solution to " + searchTerm  #search without top relevant words
        results4['d']['results'] = [
            x for x in (bing_api(query)['d']['results'] +
                        results4['d']['results'])
        ]  #combine results

    paragraphArr = []
    cleanParagraphArr = []
    searchTermParArr = []
    cleanSearchTermParArr = []
    for x in results4['d']['results']:
        url = x['Url']
        paragraphs = findSentence(url, searchTerm, 'solution to ', '', 1)
        if (len(paragraphs[0]) < 1):
            paragraphs = findSentence(url, searchTerm, 'solution to ', '', 0)
        for y in range(0, len(paragraphs[0])):
            if (paragraphs[2]):
                paragraphArr.append(paragraphs[0][y])
                cleanParagraphArr.append(paragraphs[1][y])
            else:
                searchTermParArr.append(paragraphs[0][y])
                cleanSearchTermParArr.append(paragraphs[1][y])
    if (len(paragraphArr) < 1):
        paragraphArr = searchTermParArr
        cleanParagraphArr = cleanSearchTermParArr
    if (len(cleanParagraphArr) > 0):
        parIndex = bagOfWordsGivenParagraphs(cleanParagraphArr, top2)[2]
        print paragraphArr[parIndex]
        result4 = paragraphArr[parIndex]
        print '\n'

    return [result1, result2, result3, result4]