예제 #1
0
def extract_compounds(candidates):
    """Iterative extraction of valid compounds from list of split compound candidates."""
    spell = SpellChecker(language='de')
    compounds_preliminary = []
    compounds_final = []
    compounds_final_translations = []
    for compound in candidates:
        if spell[compound[1]] and spell[compound[2]] or duden_search.search(
                compound[1]) and duden_search.search(compound[2]):
            print("Assessing '{compound1}' - '{compound2}'...".format(
                compound1=compound[1], compound2=compound[2]))
            compounds_preliminary.append([compound[1], compound[2]])

    print('\n\n\nTranslating compound segments...')
    for compound in compounds_preliminary:
        print("Translating '{word}'...".format(word=''.join(compound).title()))
        compound_translations = []
        for word in compound:
            compound_translations.append(google_search.search(word))
        if compound_translations[0] and compound_translations[1]:
            compounds_final.append(compound)
            compounds_final_translations.append(compound_translations)

    compounds_dictionary = dict(
        zip([
            ''.join([segments[0], segments[1].lower()])
            for segments in compounds_final
        ], compounds_final_translations))
    print('\n\n\nExtracted compounds:')
    for i in compounds_dictionary.items():
        print(i)

    return compounds_dictionary
예제 #2
0
def retrive_from_google(query, num_pages=5):
    """
    Search Google for a keyword and retrieve urls of search results.

    Keyword arguments:
        query : Query to search on Google
        num_pages : Number of urls to return
    Return:
        results : List of urls
    """
    print('Searching google for "', query, '" ...')
    import google_search as google
    import os
    response = google.search(query, stop=num_pages)
    results = {}
    cache = [
        ''.join(fname.split('.')[:-1])
        for fname in os.listdir('google_retrieved')
    ]
    pbar = ProgressBar()
    for link in pbar(response):
        fname = ''.join(link.split('/'))
        if fname not in cache:
            text = scrape_data(link)
            cache.append(link)
            open(os.path.join('google_retrieved', fname), 'w').write(text)
    return results
예제 #3
0
파일: search.py 프로젝트: throb/scrap
def searchGoogle (productName, searchTerm, resultAmount):
	
    now = datetime.now()
    todayTime = '%02d %02d' % (datetime.time(now).hour,datetime.time(now).minute)
    todayDate = '%04d %02d %02d' % (datetime.date(now).year,datetime.date(now).month,datetime.date(now).day)
    
    #productName = 'archetype'
    #searchTerm = 'archetype vfx pipeline nederhorst torrent'
    
    urlList = []
    
    for url in search (searchTerm,stop=resultAmount):
	urlList.append(url)
    
    accetableSites = [
	'amazon.com',
	'thegnomonworkshop.com',
	'cgchannel.com',
	'vray.info',
	'thefoundry.co.uk',
	'youtube.com',
	'vimeo.com',
	'pixologic.com',
	'twitter.com',
	'facebook.com',
	'3dtotal.com',
	'tutsplus.com',
        'gnomonschool.com',
        'chillingeffects.org',
        'deviantart.com'
	]
    
    outData = {
	'product':
	{
	    'name':productName,
	    'searchterm':searchTerm,
	    'date':todayDate,
	    'time':todayTime        
	}
	}
    
    illegalURLs = []
    for url in urlList:
	foundIllegalSite = False
	for goodSite in accetableSites:
	    if goodSite.lower() in url.lower():
		break
	else:
	    illegalURLs.append(url)
	    logger.info('Found : %s' % url)

    #illegalURLs = [targ for targ in urlList if any(re.search(r'(?!\b{})'.format(goodSite), targ, re.I) for goodSite in accetableSites)]
    #print matches	    
    
    outData['product']['sites']= illegalURLs
    return outData
예제 #4
0
	def __init__(self, term):
		url = ''
		urls = g.search(term)
		for i in urls:
			if 'product-reviews' in i:
				url = i
		
		self.filt = []
		self.d = {}
		page = requests.get(url).text
		lst = amazon_scraper.get_page_reviews(page)
		self.calc_freq(lst[0])
		self.filt = self.generate_list()
예제 #5
0
    def __init__(self, term):
        url = ''
        urls = g.search(term)
        for i in urls:
            if 'product-reviews' in i:
                url = i

        self.filt = []
        self.d = {}
        page = requests.get(url).text
        lst = amazon_scraper.get_page_reviews(page)
        self.calc_freq(lst[0])
        self.filt = self.generate_list()
예제 #6
0
async def on_message(message):
    if message.author == client.user:
        return
    if message.content == 'hi':
        return await message.channel.send("<@{0}> Hey".format(
            message.author.id))
    if message.content.startswith('!google'):
        search_text = message.content.split("!google")
        if len(search_text) > 1:
            persistent_store.add_search(search_text[1], message.author.id)
            search_response = google_search.search(search_text[1])
            return await message.channel.send("<@{0}> {1}".format(
                message.author.id, search_response))
    if message.content.startswith('!recent'):
        search_text = message.content.split("!recent")
        if len(search_text) > 1:
            recent_response = persistent_store.find_search_term(
                search_text[1], message.author.id)
            return await message.channel.send("<@{0}> {1}".format(
                message.author.id, recent_response))
예제 #7
0
def get_message(bot, update):
    update.message.reply_text("got text")
    word = update.message.text
    text = search(word)
    update.message.reply_text(text)
예제 #8
0
def getResponse(msg, ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if (tag == "lyrics"):
            response = random.choice(i['responses'])
            ChatLog.insert(END, "Chatty: " + response + '\n\n')
            content = song_lyrics(msg)
            if type(content) == str:
                ChatLog.insert(END, "Chatty: " + content + '\n\n')
                return "here are you results."
            elif type(content) == tuple:
                for data in content:
                    ChatLog.insert(END, "Chatty: " + data + '\n\n')
                return "here are you results."

        elif (tag == "music"):
            content = music(msg)
            if type(content) == str:
                ChatLog.insert(END, "Chatty: " + content + '\n\n')
            elif type(content) == tuple:
                title, flink, vlink = content

                def audio_link():
                    # import vlc
                    # p = vlc.MediaPlayer(flink)
                    # p.play()
                    webbrowser.open_new(flink)

                def video_link():
                    webbrowser.open_new(vlink)

                ChatLog.insert(END, "Chatty: " + title + '\n\n')
                ChatLog.insert(END, "Audio link: " + flink + '\n\n',
                               hyperlink.add(audio_link))
                ChatLog.insert(END, "Video link: " + vlink + '\n\n',
                               hyperlink.add(video_link))
            return "here are you results."

        elif (tag == 'news'):
            title, description, short_link = get_news()
            for i in range(5):

                def news_link():
                    webbrowser.open_new(short_link[i])

                ChatLog.insert(END,
                               "News: " + title[i] + ' - \n' + description[i])
                ChatLog.insert(END, "\nclick here for brief.\n\n",
                               hyperlink.add(news_link))
            return "here are your result."

        elif (tag == 'weather'):
            weather_report = get_weather_data(msg)
            ChatLog.insert(END, "Chatty: " + weather_report + '\n\n')
            return "here are your results"

        elif (tag == 'quotations'):
            quoto = get_quotations()
            ChatLog.insert(END, "Chatty: " + quoto + '\n\n')
            return "here are your result"

        elif (tag == 'search'):
            search(msg)
            return "open your browser if it's not opened automatically"

        elif (i['tag'] == tag):
            result = random.choice(i['responses'])
            return result
예제 #9
0
파일: bot_01.py 프로젝트: Abhi2955/chatbot
    if (choice == 1):
        request = input('you: ')  # user requesting here with a message
    else:
        request = listencommand()

    response = bot.get_response(request)  #bot responding to their request
    # action based on response of bot
    if (response == "bye"):
        #
        strrespo = str(response) + " Happy to help you"
        print('Bot: ', strrespo)  # printing the response of Bot
        speak(strrespo)
        break
    elif (request == google + request[6:]):
        speak("here we go")
        google_search.search(request[6:])
    elif (response == "google on" or response == "google about"
          or response == "google it" or response == "google this"
          or response == "search on google"):
        #
        print(response)
        strres = str(response)
        strlen = len(strres)
        finalstr = request[strlen:]
        speak("here we go")
        google_search.search(finalstr[0:])
    elif (response == "wiki about" or response == "wikipedia about"
          or response == "tell me about" or response == "tell me"
          or response == "who is" or response == "what is a"
          or response == "what is an" or response == "wikipedia"
          or response == "wiki on" or response == "what is"):
예제 #10
0
def start(cmd):
    if "find meaning" in cmd or ("tell" in cmd and "meaning" in cmd) or ("find" in cmd and "meaning" in cmd):
        the_voice.say_and_print("Say the word to get meaning")
        a = the_voice.listening()
        if a is None:
            print("Sorry, didn't get you!")
            print("Please say the name of file to search, or \"CANCEL\" to cancel")
            while a is not None:
                a = the_voice.listening()
                a = a.lower()
                if a == "cancel":
                    return 1
        a = a.lower()
        the_voice.say_and_print("Finding meaning...")
        tell_meaning.meaning(a)
        return 1

    elif "search google" in cmd or "find on google" in cmd or ("search" in cmd and "net" in cmd) or (
            "search" in cmd and "google" in cmd):
        the_voice.say_and_print("What do you want to search?")
        a = the_voice.listening()
        if a is None:
            print("Sorry, didn't get you!")
            print("Please say the name of file to search, or \"CANCEL\" to cancel")
            while a is not None:
                a = the_voice.listening()
                a = a.lower()
                if a == "cancel":
                    return 1
        a = a.lower()
        the_voice.say_and_print("Searching...")
        google_search.search(a)
        return 1

    elif "open site" in cmd or ("open" in cmd and "site" in cmd):
        the_voice.say_and_print("which site do you want to open?")
        a = the_voice.listening()
        if a is None:
            print("Sorry, didn't get you!")
            print("Please say the name of file to search, or \"CANCEL\" to cancel")
            while a is not None:
                a = the_voice.listening()
                a = a.lower()
                if a == "cancel":
                    return 1
        a = a.lower()
        the_voice.say_and_print("Opening Site...")
        frequent_site.site(a)
        return 1
    elif "set reminder" in cmd or ("set" in cmd and "reminder" in cmd):
        the_voice.say_and_print("What shall I remind you about?")
        a = the_voice.listening()
        if a is None:
            print("Sorry, didn't get you!")
            print("Please say the name of file to search, or \"CANCEL\" to cancel")
            while a is not None:
                a = the_voice.listening()
                a = a.lower()
                if a == "cancel":
                    return 1
        a = a.lower()
        the_voice.say_and_print("Setting reminder...")
        the_voice.say_and_print("In how many seconds you want to be reminded?")
        while True:
            temp = the_voice.listening()
            try:
                b = int(temp)
                break
            except:
                if temp.lower() == "cancel":
                    return 1
                the_voice.say_and_print("Please tell after how many seconds you want to be reminded, or say \"CANCEL\" to cancel")
        set_reminder.task(a, b)
        return 1

    elif "movies nearby" in cmd or "search movies" in cmd or ("movies" in cmd and "nearby" in cmd) or (
            "search" in cmd and "movies" in cmd):
        the_voice.say_and_print("Showing Movies Near by...")
        movies_nearby.movies("movies nearby")
        return 1

    elif "plan travel" in cmd or ("plan" in cmd and "travel" in cmd) or ("book" in cmd and "hotel" in cmd) or (
            "book" in cmd and "flight" in cmd) or ("book" in cmd and "train" in cmd) or (
            "book" in cmd and "bus" in cmd):
        the_voice.say_and_print("Plan Your Travel")
        plan_travel.travel()
        return 1

    elif "latest updates" in cmd or ("latest" in cmd and "updates" in cmd) or ("tell" in cmd and "news" in cmd):
        the_voice.say_and_print("Showing Latest Feeds...")
        news_update.news()
        return 1

    elif "cricket score" in cmd or ("cricket" in cmd and "score" in cmd) or ("live" in cmd and "cricket" in cmd):
        the_voice.say_and_print("Showing Cricket Score...")
        cricket_score.score()
        return 1

    return 0
예제 #11
0
         bot.get_response(request))  #bot responding to their request
     strrespo = response + ", Happy to help you"
     print('Bot:', strrespo)  # printing the response of Bot
     speak(strrespo)
     break
 elif ("#wiki" in request):
     speak("sure")
     reqind = request.find("#wiki")
     response = wiki_search.wiki(request[reqind + 5:])
     print("Bot:", response)
     speak(response)
 elif ("#google" in request):
     speak("sure")
     speak("here we go")
     reqind = request.find("#google ")
     response = google_search.search(request[reqind + 7:])
     print("check it out please")
     speak("check it out please")
 elif ("#humidity" in request):
     reqind = request.find("#humidity ")
     fun_humidity(reqind + 10)
 elif ("#temp " in request):
     reqind = request.find("#temp ")
     fun_temp(reqind + 5)
 elif ("#weather " in request):
     reqind = request.find("#weather ")
     fun_complete_weather(reqind + 9)
 elif ("#weather_status " in request):
     reqind = request.find("#weather_status ")
     fun_weather(reqind + 16)
 elif ("#wind_speed " in request):
    def main(self):
        if self.incoming_data_file:
            queries = IO.read_list_of_lists_from_file_usung_csv_v1(self.incoming_data_file)
            IO.save_text_to_file('[', self.output_data_file)
        elif self.search_domain:
            print self.search_domain
            print "Opps!!! Stopped ... Not implemented for domain search yet"
            return
        elif self.search_hotel_name:
            #print self.search_hotel_name
            search_query = [ 1, self.search_hotel_name , '']
            if self.search_hotel_location:
                search_query[2] = self.search_hotel_location
            queries = []
            queries.append(search_query)

        else:
            logging.error('Something went Wrong, Cant Proceed Further.Stop')
            return     
        
        for query_list in queries:
            try:
                try:
                    sr_no = query_list[0]
                except:
                    sr_no = None    
                
                try:
                    hotel_name = query_list[1]
                except:
                    hotel_name = None

                try:
                    hotel_location = query_list[2]
                except:
                    hotel_location = None
    
                if not hotel_name:
                    continue

                #self.input_search_hotel_name =  query_list[0]   
                query_dict = {'Hotel_Name': hotel_name, 'Hotel_Location': hotel_location, 'Sr_No': sr_no}

                logging.info('To Find : {}'.format(query_dict))
                
                domain = 'http://www.tripadvisor.com/Hotel_Review-'
                params = {'lang': 'en', 'tld': 'com'}
                params['pause'] =  2.0
                params['stop']= 1
                params['only_standard']= True
                params['start']=  0
                params['num']= 1

                query = 'Site : {} {} {}'.format(str(self.base_url),hotel_name,hotel_location)
                #print query
                
                #do bing search for hotel 
                links_to_hotels = self.get_links_bing_search(query_dict)
                
                if not links_to_hotels:
                    # if return No Url Then go For google Search
                    links_to_hotels = self.get_links_google_search(query_dict)

                if not links_to_hotels:
                    #last try, if return No Url Then go For google Search With Proxy
                    logging.info("IN Google SEARCH With Proxy")
                    links_to_hotels =  [url for url in search(query, **params)]

                logging.info('Parse Link To Soup : {} '.format(links_to_hotels)) 
                
                hotel_data =self.extract_trip_advisor_details(query_dict,links_to_hotels)
                self.results = hotel_data
                
                if self.incoming_data_file:
                    self.save_item_to_file(hotel_data,self.output_data_file)


            except Exception as e:
                    logging.error('IN MAIN :: {} '.format(str(e)))
                    continue           
            time.sleep(3)
            
        if self.incoming_data_file:
            IO.save_text_to_file(']', self.output_data_file)     # close json list