def do_query(query, print_flag=False, *, n_result = 10): try: from googlesearch import search except ImportError: print("No module named 'google' found") # to search #query = "Geeksforgeeks" for j in search(query, tld="com", num=n_result, stop=1, pause=2): if print_flag: print(j)
speak("According to Wikipedia") speak(results) except Exception as e: print(e) sys.stdout.flush() speak(e) elif 'search google' in query: speak("What do you want me to search?") print("Listening.....") sys.stdout.flush() gs = takeCommand().lower() speak("Searching Google...") arr = list(search(gs, tld='co.in', lang='en', num=5, start=0, stop=5, pause=1.2)) clone_arr = list() for i in range(len(arr)): clone_arr.append(f"{i+1}. " + arr[i]) google_outs = '' google_outs += "<br>".join(clone_arr) print(google_outs+"<br>"*2 + "Listening....") sys.stdout.flush() # time.sleep(1.5) speak("Do you want me to open any of these??")
def get_arxiv_urls(paper_titles, output_json_dir=None): """Get a list of arxiv urls from paper_titles One way to do it is to install googler and use it. ``` for filename in files[:1]: os.system('googler -n 1 "{}" arxiv.org filetype:pdf'.format(filename)) ``` Note that google search has a query limit. Per instructions here: https://github.com/abenassi/Google-Search-API/blob/master/google/modules/utils.py#L81 >> You may also wanna wait some time between queries, say, randint(50,65) between each query, and randint(180,240) every 100 queries, which is what I found useful. Here we use python lib googlesearch for portability TODO: try https://github.com/abenassi/Google-Search-API to avoid double query """ urls = [] for idx_query, paper_title in tqdm(list(enumerate(paper_titles[:]))): success = False num_trial = 0 sleep_in_seconds = 0 query_results = [] query = '{} arxiv.org'.format(paper_title) print(query) # try num_trial times with sleep_in_seconds intervals before moving on to next search while not success and num_trial < 2: time.sleep(sleep_in_seconds) try: # convert returned iterator to list to catch the error here query_results = list( googlesearch.search(query, stop=1, pause=2)) except: sleep_in_seconds = random.randint(180, 240) * (num_trial + 1) print('Warning: sleep and retry in {} seconds'.format( sleep_in_seconds)) continue num_trial += 1 success = True for url in query_results: if 'arxiv.org/pdf' in url: # cannot scrape the title of a web page containing a pdf file continue # sometimes google gives a numeric IP as of arxiv. url = 'https://arxiv.org/' + '/'.join( url.replace('https://', '').split('/')[-2:]) try: url_title = google_scrape(url) except: url_title = '' print('url ', url) print('url title ', url_title) urls.append({ 'url_title': url_title, 'url': url, 'query': paper_title, }) if os.path.isdir(output_json_dir): with open(os.path.join(output_json_dir, 'query_urls.json'), 'a') as f_out: json.dump(urls, f_out, indent=4, sort_keys=True) return urls
def searching(item): searching = search(item, num_results=5) for i in searching: search_list.append(i) return search_list
import webbrowser from googlesearch import search import pyqrcode from pyqrcode import QRCode f = open("glinks", 'w') web = input("ENTER TOPIC TO SEARCH ON GOOGLE :") url_list = [] x = 1 for i in search(web, tld="co.in", lang="eng", num=3, start=0, stop=3, pause=2): url_list.append(i) print(i) url = pyqrcode.create(i) webbrowser.open(i) url.svg("qrcode" + str(x) + ".svg", scale=6) x = x + 1
cursor = connection.cursor() cursor.execute(sql_select_Query) records = cursor.fetchall() print("Number of games: ", cursor.rowcount) print("\nFor each game now:") # row[0] = id, row[1] = name for row in records: games[row[0]] = row[1] # searching for id in games: query = "Buy " + games[id] + "digital copy" links = [] domains = [] for url in search(query, tld="co.in", num=8, stop=8, pause=2): domain = url.split("//")[-1].split("/")[0] if domain not in domains: domains.append(domain) links.append(url) if len(links) == 3: break for i in range(0, len(links)): try: sql_insert_Query = "insert into store_links (id_game, domain_name, link) values (%s, %s, %s)" val = (id, domains[i], links[i]) cursor2 = connection.cursor() cursor2.execute(sql_insert_Query, val) print("a intrat ", games[id]) except Error as e:
def scrape_company(company, max_depth=MAX_DEPTH, max_entry_links=MAX_ENTRY_LINKS, max_total_links=MAX_TOTAL_LINKS): # Open up Selenium web browser options = webdriver.ChromeOptions() options.add_argument('headless') options.add_argument('--incognito') options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64 ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/70.0.3538.77 Safari/537.36') driver = webdriver.Chrome(options=options) # Find company website, abbreviation for url in search(company + ' careers website', stop=1): company_website = urlparse(url).netloc company_abbrev = company.lower().replace(' ', '').replace('.', '') print('Company is {0}.'.format(company)) print('Company careers website is {0}.'.format(company_website)) # Intially, frontier is populated a Google search # These links must originate at the company website found earlier frontier = PriorityQueue() visited = set() search_term = company + ' internship apply' for entry_link in search(search_term, stop=max_entry_links): if urlparse(entry_link).netloc != company_website: continue entry_link = normalize_link(entry_link) heuristic = score_link_heuristic(entry_link, company) frontier.put((-heuristic, (entry_link, 1))) visited.add(entry_link) # This method allows us to vet & add child links to the frontier def explore_child_links(links): for link in links: # Filter out obviously bad links if not link or len(link) <= 2 or link[0] == '#' or \ 'javascript:void' in link or link.startswith('mailto'): continue # Fix relative links, trim trailing slashes, etc link = normalize_link(link, parent=current, parent_base=current_base) # Links must either be tied to the company or # an external job application website domain = urlparse(link).netloc if domain != company_website and \ company_abbrev not in domain and \ 'taleo' not in domain and \ 'workday' not in domain and \ 'greenhouse' not in domain and \ 'jobvite' not in domain and \ 'icims' not in domain: continue # PDF or image links should not be followed if link.endswith('.pdf') or link.endswith('.jpg') or \ link.endswith('.jpg'): continue # Skip links that have already been added to the frontier if link in visited: continue heuristic = score_link_heuristic(link, company_abbrev) frontier.put((-heuristic, (link, depth + 1))) visited.add(link) results = [] link_idx = 0 while frontier and link_idx < max_total_links: # Extract current link we are on and the link's root (excludes path) neg_heuristic, (current, depth) = frontier.get() current_parse = urlparse(current) current_loc = current_parse.netloc current_base = current_parse.scheme + '://' + current_loc # TODO: Convert from printing to logging print('Visiting ... {0} (depth={1}, lh={2})'.format( current, depth, -neg_heuristic)) # Use Selenium to fetch our page, wait a bit for the page to load driver.get(current) time.sleep(2) content = driver.page_source # Determine whether a page is explorable without doing # any HTML parsing by doing some primitive checks lcontent = content.lower() explorable = False for keyword in ['job', 'career', 'intern']: if keyword in lcontent: explorable = True break if not explorable: continue link_idx += 1 # Parse HTML using BS4, discard links in header and footer soup = BeautifulSoup(content, 'lxml') if soup.header: soup.header.decompose() if soup.footer: soup.footer.decompose() # Assign score to page based off of BS4 parse page_score = score_page(soup, company_abbrev) iframes = driver.find_elements_by_tag_name('iframe') for iframe in iframes: driver.switch_to.frame(iframe) isoup = BeautifulSoup(driver.page_source, 'lxml') if isoup.header: isoup.header.decompose() if isoup.footer: isoup.footer.decompose() page_score = max(page_score, score_page(isoup, company_abbrev)) driver.switch_to.default_content() if page_score > 0: results.append((current, page_score)) # Child exploration cannot exceed the given maximum depth if depth < max_depth: # Collect links from anchor tags explore_child_links( [a.get('href') for a in soup.find_all('a', href=True)]) # Collect links from each iframe separately for iframe in iframes: driver.switch_to.frame(iframe) isoup = BeautifulSoup(driver.page_source, 'lxml') if isoup.header: isoup.header.decompose() if isoup.footer: isoup.footer.decompose() explore_child_links( [a.get('href') for a in isoup.find_all('a', href=True)]) # Close the browser instance that Selenium opened driver.close() # Find all result links that have the maximum score if results: max_score = max([score for _, score in results]) max_links = [link for link, score in results if score == max_score] return (max_links, max_score) else: return ([], 0)
selection = int( input( "\nPlease select a dork:\n[1] cat\n[2] id\n[3] article\n[4] page\n[5] bookid\n[6] Custom dork\n" )) if selection == 6: dork = input( "\nPlease enter the dork (ex: for ?id= you would just enter id)\n") else: dork = dorkList[selection - 1] finalDork = "inurl: ?" + dork + "=" searchAmount = int(input("\nPlease enter an amount of links to test:\n")) print("\nSearching and testing...\n") for x in search(finalDork, tld='com', lang='en', num=searchAmount, start=0, stop=searchAmount, pause=2.0): url = x + "'" res = requests.get(url) html_page = res.content soup = BeautifulSoup(html_page, 'html.parser') text = soup.find_all(text=True) for y in text: if y.find("You have an error in your SQL") != -1: foundUrls.append(x) print("\nVulnerable site found:", x) file = open("output.txt", "a") for x in foundUrls: file.write(x + "\n")
elif press == 5: os.system("date +'%F %T'") elif press == 6: print("are you sure ? y/n") access=input() if access == "y": os.system("reboot") elif access == "n": print("Process aborted") else: print("wrong keyword pressed") elif press == 7: name=input("Enter your search keyword : ") os.system('firefox --search {}'.format(name)) elif press == 8: #Install mpg123 packages.(yum install -y mpg123) file = "/root/Downloads/hindi.mp3" os.system("mpg123 " + file) elif press == 9: #Install 'pip3 install google'. try: from googlesearch import search except ImportError: print("No module named 'google' found") keyword =input("Enter your keyword to search : ") for name in search(keyword, tld="com", num=5, stop=5, pause=2): print(name)
def runvisuals(self): #Have to make user_id set in the sessions #GOOGLE APIS try: from googlesearch import search except ImportError: print("We could not find it at all!") query = "Instagram" for i in search(query, tld="co.in", num=10, stop=10, pause=2): print(i) self.winfo_toplevel().title("Perseus 1.1.0") #self.parent.geometry('2000x900') self.parent.configure(background="grey") #sideFrame = Frame(self.parent,height=1500,width=100, relief=SUNKEN) #sideFrame.grid(row=0,column=0, sticky='w') newFrame = Frame(self.parent, height=1500,width=150, relief=SUNKEN) newFrame.grid(row=0,column=1, sticky='ne',padx=(0,10)) e = Entry(newFrame, width=150); e.grid(row=0,column=0,padx=(0,10)) def Search(): try: from googlesearch import search except ImportError: print("We could not find it at all!") query = e.get() r = 2 for i in search(e.get(), tld="co.in", num=10, stop=10, pause=2): LabelResult = Label(newFrame, text=i) LabelResult.grid(row=r, column=0, sticky='nw', padx=(0,10)) ButtonOptions = Button(newFrame, text="Save", command=lambda:SaveLink(e.get(),i, 1)) ButtonOptions.grid(row=r, column=2, sticky='nw', padx=(0,10)) r = r + 1 searchBtn = Button(newFrame,text="Search", command=Search) searchBtn.grid(row=1,column=0, sticky='nw',padx=(0,10)) #newFrame.grid(row=2,column=0) #yscrolbar.configure(command=newSpace.yview) #xscrollbar.configure(command=newSpace.xview) """ Making the Side window with File Tree >>>>>>> 73be496... Finish p = ttk.Panedwindow(sideFrame, orient=VERTICAL) # first pane, which would get widgets gridded into it: fp = ttk.Labelframe(p, text='File Management', width=200, height=720) p.add(fp) p.pack(fill=BOTH, expand=1) #Tab view n = ttk.Notebook(fp) f1 = ttk.Frame(n, width=200, height=720) # first page, which would get widgets gridded into it <<<<<<< HEAD f2 = ttk.Frame(n, width=200, height=720) # second page n.add(f1, text='Local Files') n.add(f2, text='Online Files') n.pack(fill="both", expand=1) #Toolbar view #toolbar = Frame(frame, style='My.TFrame', height=720, width=25, relief=SUNKEN) #canvasToolbar = Canvas(toolbar, bg='blue',height=720, width=20) #toolbar.pack(side=RIGHT, fill=BOTH, expand=1) #toolbar.place(relx=1,rely=0,anchor=NE) #buttons on the toolbar """for i in range(15): button = Button(toolbar, height=1) button.pack(side=TOP, fill=BOTH, expand=1) """ """Label""" ttk.Label(f1, text="Hierachical Treeview").pack() """Treeview""" treeview=ttk.Treeview(f1) treeview.pack() """Treeview items""" ======= # second page n.add(f1, text='Saved Files') n.pack(fill="both", expand=1) #Toolbar view toolbar = Frame(newFrame, style='My.TFrame', height=720, width=25, relief=SUNKEN) canvasToolbar = Canvas(toolbar, bg='blue',height=720, width=20) toolbar.pack(side=RIGHT, fill=BOTH, expand=1) toolbar.place(relx=1,rely=0,anchor=NE)" #buttons on the toolbar #options = ["Search", "Save", "Delete"] #for i in options: # button = Button(toolbar, text=i) # button.pack(side=TOP, fill=BOTH, expand=1)""" """#Label ttk.Label(f1, text="Hierachical Treeview").pack() #Treeview treeview=ttk.Treeview(f1) treeview.pack() #Treeview items >>>>>>> 73be496... Finish treeview.insert('','0','item1',text='Parent tree') treeview.insert('','1','item2',text='1st Child') treeview.insert('','end','item3',text='2nd Child') treeview.insert('item2','end','A',text='A') treeview.insert('item2','end','B',text='B') treeview.insert('item2','end','C',text='C') treeview.insert('item3','end','D',text='D') treeview.insert('item3','end','E',text='E') treeview.insert('item3','end','F',text='F') treeview.move('item2','item1','end') <<<<<<< HEAD treeview.move('item3','item1','end') """Making the compression Rate Label """ #compressionRateLabel = Label(frame, text="Compression Rate : ", height=1, width=1648, bd=1, bg="grey") #compressionRateLabel.place(relx=0, rely= 1, anchor=S) #TestShape = Shapes(frame) #TestShape.circle() """ Making the menu bar for the application""" menubar = Menu(self.parent) """Making the file tree """ localFileTree = Treeview(f1) ======= treeview.move('item3','item1','end')""" """Making the compression Rate Label """ #ompressionRateLabel = Label(Shapes.frame, text="Compression Rate : ", width=1648, bd=1, bg="grey") #compressionRateLabel.place(relx=0, rely= 1, anchor=S) #TestShape = Shapes(newFrame) #TestShape.circle(10,10,100,100,"red",) """ Making the menu bar for the application""" menubar = Menu(self.parent) """Making the file tree """
#!/usr/bin/python import webbrowser from googlesearch import search #to take input of search web = input('Enter what to search') url = [] #Now to search for each_search in search(web, stop=5): url.append(each_search) print(each_search) #print each searched link webbrowser.open_new_tab(each_search) for each in search(each_search, stop=5): #to search again in above 5 links print(each) #print each searched link webbrowser.open_new_tab(each)
def get_urls(tag, n, language): urls = [url for url in search(tag, stop=n, lang=language)][:n] return urls
from googlesearch import search import pyqrcode from pyqrcode import QRCode urlinput=input("enter text to search") urllist=[] for i in search(urlinput, tld='com', lang='en', num=10, start=0, stop=5, pause=2) : urllist.append(i) print(i) url=pyqrcode.create(i) for j in range(5) : url.svg(str(j)+".svg",scale=8) print(url.terminal())
# **** def spinning_cursor(): while True: for cursor in '|/-\\': yield cursor spinner = spinning_cursor() for _ in range(100): sys.stdout.write(next(spinner)) sys.stdout.flush() time.sleep(0.1) sys.stdout.write('\b') #***** for gamma in search(query, tld=beta, stop=50, num=10, pause=2): print(colored('[+] Found > ', 'yellow') + (gamma)) print(colored('[+] 20% done ', 'green')) B = """ inurl:dtm.html intitle:1747-L551 """ query = B # **** def spinning_cursor(): while True: for cursor in '|/-\\': yield cursor spinner = spinning_cursor() for _ in range(100):
def extractFraseGoogle(frase, cantRes): print ("Searching emails... please wait") print ("This operation may take several minutes") try: listUrl = [] count = 0 for url in search(frase, stop=cantRes): listUrl.append(url) for i in listUrl: try: req = urllib.request.Request( i, data=None, headers={ 'User-Agent': ua.random }) try: conn = urllib.request.urlopen(req) except timeout: print("Bad Url..") time.sleep(2) pass except(HTTPError, URLError): print("Bad Url..") time.sleep(2) pass status = conn.getcode() contentType = conn.info().get_content_type() if(status != 200 or contentType == "audio/mpeg"): print("Bad Url..") time.sleep(2) pass html = conn.read() soup = BeautifulSoup(html, "lxml") links = soup.find_all('a') print("They will be analyzed " + str(len(links) + 1) + " Urls..." ) time.sleep(2) for tag in links: link = tag.get('href', None) if link is not None: try: print ("Searching in " + link) if(link[0:4] == 'http'): req = urllib.request.Request( link, data=None, headers={ 'User-Agent': ua.random }) try: f = urllib.request.urlopen(req) except timeout: print("Bad Url..") time.sleep(2) pass except(HTTPError, URLError): print("Bad Url..") time.sleep(2) pass status = conn.getcode() contentType = conn.info().get_content_type() if(status != 200 or contentType == "audio/mpeg"): print("Bad Url..") time.sleep(2) pass s = f.read().decode('utf-8') emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", s) for email in emails: if (email not in listUrl and email[-3:] not in imageExt): count += 1 print(str(count) + " - " + email) listUrl.append(email) if (searchEmail("Emails.db", email, frase) == 0): insertEmail("Emails.db", email, frase, link) # Sigue si existe algun error except Exception: pass print(str(count) + " emails were found") except urllib.error.URLError as e: print("Problems with the url:" + i) print(e) pass except KeyboardInterrupt: input("Press return to continue") menu() except Exception as e: print(e) input("Press enter to continue") menu()
print("Enter Your Choice : ") ch=int(input("1)TEXT\t2)VOICE")) if ch==2: r=sr.Recognizer() with sr.Microphone() as source: print("Listening...."); data=r.listen(source) print("Searching...") try: query=r.recognize_google(data) except: pass; elif ch==1: query=input("Type the Query: ") else : print("Invalid") exit url=[] for i in search(query,stop=10): print(i) url.append(i) print(url)
def start(): time.sleep(2) print("what is your name sir? ") os.startfile("wiyn.mp3") time.sleep(1.3) with mic as source: try: print("---listening---") r.adjust_for_ambient_noise(source) audio=r.listen(source) print("recognising...") name=r.recognize_google(audio) print(name) except: name=("Sir") greet= gTTS(text=name,lang="hi") greet.save("name.mp3") os.startfile("hello.mp3") os.startfile("hello.mp3") time.sleep(1.1) os.startfile("name.mp3") time.sleep(2.0) text=("I am Mouli. your virtual assistant, say ' hey molly ', if you need any help ") intro=gTTS(text=text,lang="en") intro.save("text.mp3") os.startfile("text.mp3") time.sleep(4.3) while True: bkchd=-5 with mic as source: try: print("listening") r.adjust_for_ambient_noise(source) audio=r.listen(source) print("recognising") openw=r.recognize_google(audio) if (openw=="Hemali" or openw=="hey only" or openw=="hey money" or openw=="hey Molly"): os.startfile("beep.mp3") print("verified") bkchd=5 except: a=("hye") if (bkchd>0): hcihy=gTTS(text="how can i help you?",lang="en") hcihy.save("hcihy.mp3") os.startfile("hcihy.mp3") time.sleep(2) with mic as source: try: print("---listening---") r.adjust_for_ambient_noise(source) audio=r.listen(source) print("recognising...") task=r.recognize_google(audio) print(task) except: print("could not recognise") task=("hi Molly") bkchd=-5 j = search(task,num=1,tld="com",lang="en",stop=2,pause=1,start=1) for i in j: website=i w1=task.find("remind") w2=task.find("play") w3=task.find("open") w4=task.find("music") w5=task.find("stop listening") w6=task.find("shutdown") w7=task.find("joke") w8=task.find("please molly") w9=task.find("hi Molly") if (w2>=0): os.startfile("suresir.mp3") time.sleep(3.5) ntask=(task) n1=ntask.replace("on YouTube","") n1=n1.replace("on gaana","") n2=n1.replace("play","") n3=n2.replace("video","") n0=n3.replace("song","") n5=n0.replace("1","") n5=n5.replace("2","") n5=n5.replace("3","") n5=n5.replace("4","") n5=n5.replace("5","") n5=n5.replace("6","") n5=n5.replace("7","") n5=n5.replace("8","") n5=n5.replace("9","") n5=n5.replace("0","") n4=n5.replace("and stop listining for","") n4=n4.replace("minute","") n4=n4.replace("minutes","") n4=n4.replace("second","") n4=n4.replace("seconds","") w10=task.find("movie") w11=task.find("song") if (w11>=0): w13=task.find("gaana") if (w13>=0): webbrowser.open("www.gaana.com") time.sleep(4) keyboard.press(Key.space) keyboard.release(Key.space) bkchd=-5 else: y=1 webbrowser.open("www.youtube.com") time.sleep(5) keyboard.press(Key.tab) keyboard.release(Key.tab) keyboard.press(Key.tab) keyboard.release(Key.tab) keyboard.press(Key.tab) keyboard.release(Key.tab) keyboard.type(n4) keyboard.press(Key.enter) keyboard.release(Key.enter) time.sleep(2) for y in range (1,11): keyboard.press(Key.tab) keyboard.release(Key.tab) y=y+1 time.sleep(0.5) keyboard.press(Key.enter) keyboard.release(Key.enter) bkchd=-5 elif(w10>=0): webbrowser.open("www.khatrimaza.link") bkchd=-5 else: y=1 webbrowser.open("www.youtube.com") time.sleep(5) keyboard.press(Key.tab) keyboard.release(Key.tab) keyboard.press(Key.tab) keyboard.release(Key.tab) keyboard.press(Key.tab) keyboard.release(Key.tab) keyboard.type(n4) keyboard.press(Key.enter) keyboard.release(Key.enter) time.sleep(2) for y in range (1,11): keyboard.press(Key.tab) keyboard.release(Key.tab) y=y+1 time.sleep(0.5) keyboard.press(Key.enter) keyboard.release(Key.enter) bkchd=-5 if(w7>=0): os.startfile("suresir.mp3") time.sleep(1) i=random.randint(0,1) if (i==0): joke1() elif (i==1): joke2() bkchd=-5 if(w6>=0): os.startfile("suresir.mp3") print("Bye...") time.sleep(2) subprocess.call(["shutdown", "-f", "-s", "-t", "60"]) bkchd=-5 if(w3>=0): os.startfile("suresir.mp3") time.sleep(2) fname1=task.replace("open","") fname2=fname1.replace("game","") fname3=fname2.replace("folder","") fname4=fname3.replace("file","") fname41=fname4.replace("calculator","calc") fname5=fname41.replace("app","") keyboard.press(Key.cmd) keyboard.release(Key.cmd) time.sleep(0.05) keyboard.type(fname5) time.sleep(0.15) keyboard.press(Key.enter) keyboard.release(Key.enter) bkchd=-5 if(w1<0 and w2<0 and w3<0 and w4<0 and w5<0 and w6<0 and w7<0 and w8<0 and w9<0): os.startfile("suresir.mp3") a=0 i=0 ntask=(task) n1=ntask.replace("on YouTube","") n1=n1.replace("on gaana","") n2=n1.replace("play","") n3=n2.replace("video","") n0=n3.replace("song","") n5=n0.replace("1","") n5=n0.replace("2","") n5=n0.replace("3","") n5=n0.replace("4","") n5=n0.replace("5","") n5=n0.replace("6","") n5=n0.replace("7","") n5=n0.replace("8","") n5=n0.replace("9","") n5=n0.replace("0","") n4=n5.replace("and stop listining for","") n4=n4.replace("minute","") n4=n4.replace("minutes","") n4=n4.replace("second","") n4=n4.replace("seconds","") for i in range(0,6): prob=task.find(question[i])+1 a=a+prob i=i+1 if(a==0): try: detail= (wikipedia.summary(task,sentences=2)) tts= gTTS(text=detail,lang="en") tts.save("a.mp3") os.startfile("a.mp3") print(detail) time.sleep(15) bkchd=-5 except: print ("I can search it for you") time.sleep(1) webbrowser.open(website) bkchd=-5 else: print ("I can search it for you") time.sleep(1) webbrowser.open(website) bkchd=-5 if(w5>=0): f1=task.find("minute") f2=task.find("minutes") a1=task.replace("a","") a2=a1.replace("b","") a3=a2.replace("c","") a4=a3.replace("d","") a5=a4.replace("e","") a6=a5.replace("f","") a7=a6.replace("g","") a8=a7.replace("h","") a9=a8.replace("i","") a10=a9.replace("j","") a11=a10.replace("k","") a12=a11.replace("l","") a13=a12.replace("m","") a14=a13.replace("n","") a15=a14.replace("o","") a16=a15.replace("p","") a17=a16.replace("q","") a18=a17.replace("r","") a19=a18.replace("s","") a20=a19.replace("t","") a21=a20.replace("u","") a22=a21.replace("v","") a23=a22.replace("w","") a24=a23.replace("x","") a25=a24.replace("y","") a26=a25.replace("z","") task=a26 a1=task.replace("A","") a2=a1.replace("B","") a3=a2.replace("C","") a4=a3.replace("D","") a5=a4.replace("E","") a6=a5.replace("F","") a7=a6.replace("G","") a8=a7.replace("H","") a9=a8.replace("I","") a10=a9.replace("J","") a11=a10.replace("K","") a12=a11.replace("L","") a13=a12.replace("M","") a14=a13.replace("N","") a15=a14.replace("O","") a16=a15.replace("P","") a17=a16.replace("Q","") a18=a17.replace("R","") a19=a18.replace("S","") a20=a19.replace("T","") a21=a20.replace("U","") a22=a21.replace("V","") a23=a22.replace("W","") a24=a23.replace("X","") a25=a24.replace("Y","") a26=a25.replace("Z","") a27=a26.replace(" ","") print(a27) if(f1>=0 or f2>=0): a0=int(a27) a4=a0*60 else: a4=int(a27) os.startfile("suresir.mp3") time.sleep(a4) bck=gTTS(text="Back again. sir",lang="en") bck.save("backagain.mp3") os.startfile("backagain.mp3") time.sleep(1.5)
try: from googlesearch import search except ImportError: print("No module named 'google' found") # to search query = "Geeksforgeeks" for j in search( query, tld="co.in", num=10, stop=10, pause=2, user_agent= "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36" ): print(j) # import urllib.request # import json # x = urllib.request.urlopen('http://testpy.pickandpaid.com/test/') # data = x.read() # JSON_object = json.loads(data.decode('utf-8')) # # print(JSON_object) # for data in JSON_object: # print(JSON_object[data])
def digital_assistant(data): try: if "how are you" in data: respond("I am well") return elif "time" in data: respond(ctime()) return elif "who are you" in data or "what can you do" in data or "define yourself" in data: respond("I am viswanadh's personal assistant, I am programmed to do minor tasks like system monitoring, profiling," "predict time, take a photo, predict weather," " opening applications like youtube, google chrome ,gmail etcetre, show the top headline news and you can ask me computational or geographical questions too!") return elif "who made you" in data or "who created you" in data: respond("I was built by viswa") return elif "shutdown" in data: respond("Are you sure! you want to shutdown your computer") data = listen() if data == "yes": respond("system is going to shutdown...") os.system("taskkill /f /im Rainmeter.exe") os.system("shutdown /s /t 1") return elif "restart" in data: respond("want to restart your computer") data=listen() if data=="yes": os.system("shutdown /r /t 1") return elif "battery" in data: battery=psutil.sensors_battery() respond("Your system is at " + str(battery.percent) + " percent") return elif "cpu" in data: respond("CPU is at "+ str(psutil.cpu_percent())) return elif "music" in data: respond("Here you go with music") music_dir = "C:\\Users\\VISWANADH\\Music" song = random.choice(os.listdir(music_dir)) os.startfile(os.path.join(music_dir,song)) time.sleep(5) return elif "movie" in data: os.system("D:\\movies\\Ala_Vaikunthapurramloo.mkv") time.sleep(5) return elif "notepad" in data: os.system("notepad") return elif "open" in data: data = data.split(" ") query = data[1] for j in search(query, tld='com', lang='en', num=1, start=0, stop=1, pause=2.0): url=j webbrowser.get('chrome').open_new(url) respond(data[1] + " is open now") time.sleep(7) return elif "news" in data: query = "news" url="https://timesofindia.indiatimes.com/home/headlines" webbrowser.get('chrome').open_new(url) respond("Here are some headlines from the Times of India,Happy reading") time.sleep(5) return elif "weather" in data: data=data.split(" ") #create key: https://home.openweathermap.org/users/sign_in api_key = #################### base_url = "https://api.openweathermap.org/data/2.5/weather?" if "in" not in data: city_name = "kurupam" else: city_name = data[-1] complete_url = base_url + "appid=" + api_key + "&q=" + city_name response = requests.get(complete_url) x = response.json() if x["cod"] != "404": y = x["main"] current_temperature = y["temp"] current_humidiy = y["humidity"] z = x["weather"] weather_description = z[0]["description"] respond(" Temperature in kelvin unit at " + city_name + " is " + str(current_temperature) + "\n humidity in percentage is " + str(current_humidiy) + "\n description " + str(weather_description)) return else: respond(city_name + " weather details not found") return elif "something" in data: respond("Searching...") data=data[22:] data = "According to wikipedia " + wikipedia.summary(data, sentences=4) respond(data) return elif "capture the photo" in data or "take a photo" in data: ec.capture(0,False,"img.jpg") respond("photo captured successfully") return elif "video" in data or "capture the video" in data: ec.auto_vidcapture(0,False,"video.mkv",10) respond("video recorded successfully") return elif "access" in data: access() return elif "where is" in data: data = data.split(" ") name = data[-1] url = "https://www.google.com/maps/place/"+name webbrowser.get('chrome').open_new(url) time.sleep(5) return elif "write a note" in data: respond("What should i write, sir!") data = listen() file = open('note.txt', 'w') file.write(data) respond("noted successfully") return elif "execute" in data: execute_commands() return elif "upcoming events" in data or "scheduled events" in data or "events" in data: events = calendar_events() return elif "game" in data or "play" in data: try: tic_tac_toe() return except: return elif "create event" in data: create_event() return elif "speed test" in data: try: respond("sure! wait a second to measure") st = speedtest.Speedtest() server_names = [] st.get_servers(server_names) ping = st.results.ping downlink_Mbps = round(st.download() / 1000000, 2) uplink_Mbps = round(st.upload() / 1000000, 2) respond('ping {} ms'.format(ping)) respond("The uplink is {} Mbps".format(uplink_Mbps)) respond("The downlink is {}Mbps".format(downlink_Mbps)) return except: respond ("I couldn't run a speedtest") return elif "memory" in data: process_id = os.getpid() py = psutil.Process(process_id) memory_use = round(py.memory_info()[0]/2. **30, 2) respond("I use {} Gb of memory".format(memory_use)) return elif "internet connection" in data or "internet" in data: if internet_availability(): respond("Internet Connection is okay!") return elif 'email to' in data: try: respond("Sir, give me your message") print('Give message.......') content = takeCommand() to = "receiver email address" sendEmail(to, content) print('Sending mail........') respond("Email has been sent!") except Exception as e: print(e) respond("Sorry master . I am not able to send this email")
#from extract import extract_news_from_page, get_full_html_from_news, get_title, content_sportv, extract_text_from_news_link from googlesearch import search query = "globoesporte.globo.com/futebol/ /noticia/2016" result = search(query, tld='com', lang='pt-br', num=10, start=0, stop=50, pause=2.0) news_links = [] for r in result: if "/2016/" in r: news_links.append(r) print(news_links)
wordcount = {} for word in a.lower().split(): word = word.replace(".", "") word = word.replace(",", "") word = word.replace(":", "") word = word.replace("\"", "") word = word.replace("!", "") word = word.replace("?", "") if word not in stopwords: if word not in wordcount: wordcount[word] = 1 else: wordcount[word] += 1 # Print most common word n_print = int(input("How many key words to google?: ")) word_counter = collections.Counter(wordcount) googleString = "" for word, count in word_counter.most_common(n_print): googleString = googleString + " " + word try: from googlesearch import search except ImportError: print("No module named 'google' found") for j in search(googleString, tld="co.in", num=10, stop=5, pause=2): print(j) file.close()
def main(): running = True while running: #query = "U.S. president after Lincoln, almost impeached by the Radical Republicans quizlet" query = input("Whats the question? (Type 'q' to quit)\n\n") if query == "q": running = False break # Get all the urls urls = [] #os.environ['HTTP_PROXY'] = 'http://172.16.0.3:8888' for j in search(query, tld="co.in", num=numSites, stop=numSites, pause=2): if ("quizlet" in j): n = findnth(j, "/", 3) jfinal = j[0:n + 1] urls.append(jfinal) #print(jfinal) # Check data from quizlets to see if theres relevant stuff pp = pprint.PrettyPrinter(indent=4) found = False for url in urls: data = get_data_from(url) # Just doing the first one for now for pair in data: t = pair["term"] d = pair["definition"] keyword_prune = prune(query.lower()) matchedT = re.findall(keyword_prune, t) matchedD = re.findall(keyword_prune, d) if len(matchedT) != 0 or len(matchedD) != 0: found = True print( "\n\n -------------------------Found! (with prune)-------------------------\n\n" ) pp.pprint(pair) print( "\n\n ---------------------------------------------------------------------\n\n" ) # Now do the exact same thing with lnsw method if not found: for pair in data: t = pair["term"] d = pair["definition"] keyword_lnsw = LNSW(query) if keyword_lnsw == None: print("None keyword_lnsw") exit matchedT = re.findall(keyword_lnsw, t) matchedD = re.findall(keyword_lnsw, d) if len(matchedT) != 0 or len(matchedD) != 0: found = True print( "\n\n -------------------------Found! (with LNSW:", keyword_lnsw, ")-------------------------\n\n") pp.pprint(pair) print( "\n\n -------------------------------------------------------------\n\n" ) #print(matches) #pp.pprint(data) if not found: print("\nNothing found.\n")
import speech_recognition as sr import time import webbrowser from googlesearch import search f=open('url.txt','w') print("press 1 for keyboard and Press 2 for voice search") choice=input() if choice== '1': web=input("Please enter here : ") print(web) webbrowser.open_new_tab("https://www.google.com/search?q="+web) url=[] for i in search(web,stop=10): print(i) time.sleep(3) f.write(i+'\n') url.append(i) print(url) f.close() elif choice=='2': def recognize_speech_from_mic(recognizer,microphone): with microphone as source: audio=recognizer.listen(source) response={ "success": True, "error": None, "transcription": None, } try: reponse["transcription"]=recognizer.recognize_google(audio) except sr.RequestError:
def google_query(query): link = [] for j in search(query, tld="ca", num=10, stop=10, pause=2): link.append(j) return link
elif latest < requested: print("That comic doesn't exist yet silly!") else: url = (f'{url}{comicNumber}') webbrowser.open_new(url) except: print("An error occured. Perhaps you didn't enter an integer, or you don't have a recognized web browser, or dark magic interfered. The world may never know.") webbrowser.open_new("https://xkcd.com/2200/") elif searchType == "phrase": pattern = r"^https?://xkcd.com/\d+/$" # Matches url for an xkcd, with or without https referencePhrase = input("What is the relevant phrase?") num_results = input("How many xkcds would you like to search for?") query = ("site:xkcd.com " + str(referencePhrase)) try: i = int(num_results) for result in search(query, num=20): if i <= 0: break if re.match(pattern, result) is not None: # Result matches pattern i -= 1 webbrowser.open_new(result) except: print("An error occured. Perhaps you didn't enter an integer for the number of searches, or you don't have google, or maybe its all my fault and I really am a disappointment like my parents said.") webbrowser.open_new("https://xkcd.com/2200/") elif searchType == "random": newest_comic = asyncio.run(latest_comic_num()) comic_num = random.randint(1, newest_comic) random_url = (f"{url}{comic_num}") webbrowser.open_new(random_url) else: webbrowser.open_new("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
from googlesearch import search file = open('search.txt', 'r') count = 0 while True: count += 1 line = file.readline() if not line: break linee = line.strip() print(linee) for url in search( linee, tld="co.in", num=10, pause=10 ): #you can change The Domain You Want By Change The Tld Vaule In This Case co.in print(url) pass pass
# cannot scrape the title of a web page containing a pdf file continue # sometimes google gives a numeric IP as of arxiv. url = 'https://arxiv.org/' + '/'.join( url.replace('https://', '').split('/')[-2:]) try: url_title = google_scrape(url) except: url_title = '' print('url ', url) print('url title ', url_title) urls.append({ 'url_title': url_title, 'url': url, 'query': paper_title, }) if os.path.isdir(output_json_dir): with open(os.path.join(output_json_dir, 'query_urls.json'), 'a') as f_out: json.dump(urls, f_out, indent=4, sort_keys=True) return urls if __name__ == '__main__': query = 'Response object above tell' for url in search(query, stop=3): a = google_scrape(url) print("url", url) print("Title: " + a) print(" ")
import webbrowser import googlesearch from googlesearch import search try: from googlesearch import search except ImportError: print("No module named 'google' found") # to search query = "whatsapp" for j in search(query, tld="co.in", num=10, stop=1, pause=2): webbrowser.open(j)
def extractFraseGoogle(frase, cantRes): print("Procurando emails...Aguarde") print("Esta operacao pode demorar varios minutos") try: listUrl = [] listEmails = [] for url in search(frase, stop=cantRes): listUrl.append(url) for i in listUrl: try: req = urllib.request.Request(i, data=None, headers={'User-Agent': ua.random}) try: conn = urllib.request.urlopen(req) except timeout: print("Bad Url..") time.sleep(2) pass except (HTTPError, URLError): print("Bad Url..") time.sleep(2) pass status = conn.getcode() contentType = conn.info().get_content_type() if (status != 200 or contentType == "audio/mpeg"): print("Bad Url..") time.sleep(2) pass html = conn.read() soup = BeautifulSoup(html, "lxml") links = soup.find_all('a') print("Serao analizados " + str(len(links) + 1) + " Urls...") time.sleep(2) for tag in links: link = tag.get('href', None) if link is not None: # Fix TimeOut searchSpecificLink(link, listEmails, frase) except urllib.error.URLError as e: print("Problems with the url:" + i) print(e) pass except (http.client.IncompleteRead) as e: print(e) pass except Exception as e: print(e) pass print("") print("*******") print("Finish") print("*******") input("Aperte return para continuar") menu() except KeyboardInterrupt: input("Aperte return para continuar") menu() except Exception as e: print(e) input("Aperte enter para continuar") menu()
speak("Here you go to Google") print("* J: Here you go to Google \n") webbrowser.open("google.com") elif 'open stackoverflow' in query: speak("Here you go to Stack Over flow.Happy coding") print("* J: Here you go to stackoverflow ") webbrowser.open("stackoverflow.com") elif "wikipedia" in query: print("* J: Here you go to wikipedia") webbrowser.open("wikipedia.com") elif 'search for' in query: x=re.search('search for',query) speak("Finding results") print("J: Finding results\n") query=query[x.start()+10:] for j in search(query,tld="co.in",num=1, stop=1): print(j) webbrowser.open(j) elif 'news' in query: speak('here are some top news from the times of india') print('J: Here are some top news from the times of india') webbrowser.open("https://timesofindia.indiatimes.com/india") # STORY elif 'tell me a story' in query: speak("Reading a story book") content=takeCommand() speakstory() # PLAY MOVIE elif 'play movie' in query: speak("Playing your playlist sir")
i+=1 return i def vuln_test(url): try: response =urllib2.urlopen(url) soup = BeautifulSoup( response.read(), 'html.parser' ) url2=url+"'" response2=urllib2.urlopen(url2) soup2 = BeautifulSoup(response2.read(),'html.parser') if soup!=soup2 : if word_err(soup2) > -1 : print "\n[%s*%s]Possible SQL vulnerability => %s "% (green,norm,url2) except urllib2.HTTPError : pass except urllib2.URLError: pass credit() dork =raw_input("Inserire una dork (es: 'inurl:index.php?id=') => ") try: url_list=search(dork) for url in url_list: vuln_test(url) except urllib2.HTTPError: print "%sGoogle ban your IP%s (restart connection) " %(red,norm)
def generate_data_set(url): data_set = [] # Converts the given URL into standard format if not re.match(r"^https?", url): url = "http://" + url # Stores the response of the given URL try: response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') except: response = "" soup = -999 # Extracts domain from the given URL domain = re.findall(r"://([^/]+)/?", url)[0] if re.match(r"^www.", domain): domain = domain.replace("www.", "") # Requests all the information about the domain whois_response = whois.whois(domain) rank_checker_response = requests.post( "https://www.checkpagerank.net/index.php", {"name": domain}) # Extracts global rank of the website try: global_rank = int( re.findall(r"Global Rank: ([0-9]+)", rank_checker_response.text)[0]) except: global_rank = -1 # 1.having_IP_Address try: ipaddress.ip_address(url) data_set.append(-1) except: data_set.append(1) # 2.URL_Length if len(url) < 54: data_set.append(1) elif len(url) >= 54 and len(url) <= 75: data_set.append(0) else: data_set.append(-1) # 3.Shortining_Service match = re.search( 'bit\.ly|goo\.gl|shorte\.st|go2l\.ink|x\.co|ow\.ly|t\.co|tinyurl|tr\.im|is\.gd|cli\.gs|' 'yfrog\.com|migre\.me|ff\.im|tiny\.cc|url4\.eu|twit\.ac|su\.pr|twurl\.nl|snipurl\.com|' 'short\.to|BudURL\.com|ping\.fm|post\.ly|Just\.as|bkite\.com|snipr\.com|fic\.kr|loopt\.us|' 'doiop\.com|short\.ie|kl\.am|wp\.me|rubyurl\.com|om\.ly|to\.ly|bit\.do|t\.co|lnkd\.in|' 'db\.tt|qr\.ae|adf\.ly|goo\.gl|bitly\.com|cur\.lv|tinyurl\.com|ow\.ly|bit\.ly|ity\.im|' 'q\.gs|is\.gd|po\.st|bc\.vc|twitthis\.com|u\.to|j\.mp|buzurl\.com|cutt\.us|u\.bb|yourls\.org|' 'x\.co|prettylinkpro\.com|scrnch\.me|filoops\.info|vzturl\.com|qr\.net|1url\.com|tweez\.me|v\.gd|tr\.im|link\.zip\.net', url) if match: data_set.append(-1) else: data_set.append(1) # 4.having_At_Symbol if re.findall("@", url): data_set.append(-1) else: data_set.append(1) # 5.double_slash_redirecting list = [x.start(0) for x in re.finditer('//', url)] if list[len(list) - 1] > 6: data_set.append(-1) else: data_set.append(1) # 6.Prefix_Suffix if re.findall(r"https?://[^\-]+-[^\-]+/", url): data_set.append(-1) else: data_set.append(1) # 7.having_Sub_Domain if len(re.findall("\.", url)) == 1: data_set.append(1) elif len(re.findall("\.", url)) == 2: data_set.append(0) else: data_set.append(-1) # 8.SSLfinal_State try: if response.text: data_set.append(1) except: data_set.append(-1) # 9.Domain_registeration_length expiration_date = whois_response.expiration_date registration_length = 0 try: expiration_date = min(expiration_date) today = time.strftime('%Y-%m-%d') today = datetime.strptime(today, '%Y-%m-%d') registration_length = abs((expiration_date - today).days) if registration_length / 365 <= 1: data_set.append(-1) else: data_set.append(1) except: data_set.append(-1) # 10.Favicon if soup == -999: data_set.append(-1) else: try: for head in soup.find_all('head'): for head.link in soup.find_all('link', href=True): dots = [ x.start(0) for x in re.finditer('\.', head.link['href']) ] if url in head.link['href'] or len( dots) == 1 or domain in head.link['href']: data_set.append(1) raise StopIteration else: data_set.append(-1) raise StopIteration except StopIteration: pass #11. port try: port = domain.split(":")[1] if port: data_set.append(-1) else: data_set.append(1) except: data_set.append(1) #12. HTTPS_token if re.findall(r"^https://", url): data_set.append(1) else: data_set.append(-1) #13. Request_URL i = 0 success = 0 if soup == -999: data_set.append(-1) else: for img in soup.find_all('img', src=True): dots = [x.start(0) for x in re.finditer('\.', img['src'])] if url in img['src'] or domain in img['src'] or len(dots) == 1: success = success + 1 i = i + 1 for audio in soup.find_all('audio', src=True): dots = [x.start(0) for x in re.finditer('\.', audio['src'])] if url in audio['src'] or domain in audio['src'] or len(dots) == 1: success = success + 1 i = i + 1 for embed in soup.find_all('embed', src=True): dots = [x.start(0) for x in re.finditer('\.', embed['src'])] if url in embed['src'] or domain in embed['src'] or len(dots) == 1: success = success + 1 i = i + 1 for iframe in soup.find_all('iframe', src=True): dots = [x.start(0) for x in re.finditer('\.', iframe['src'])] if url in iframe['src'] or domain in iframe['src'] or len( dots) == 1: success = success + 1 i = i + 1 try: percentage = success / float(i) * 100 if percentage < 22.0: dataset.append(1) elif ((percentage >= 22.0) and (percentage < 61.0)): data_set.append(0) else: data_set.append(-1) except: data_set.append(1) #14. URL_of_Anchor percentage = 0 i = 0 unsafe = 0 if soup == -999: data_set.append(-1) else: for a in soup.find_all('a', href=True): # 2nd condition was 'JavaScript ::void(0)' but we put JavaScript because the space between javascript and :: might not be # there in the actual a['href'] if "#" in a['href'] or "javascript" in a['href'].lower( ) or "mailto" in a['href'].lower() or not (url in a['href'] or domain in a['href']): unsafe = unsafe + 1 i = i + 1 try: percentage = unsafe / float(i) * 100 except: data_set.append(1) if percentage < 31.0: data_set.append(1) elif ((percentage >= 31.0) and (percentage < 67.0)): data_set.append(0) else: data_set.append(-1) #15. Links_in_tags i = 0 success = 0 if soup == -999: data_set.append(-1) else: for link in soup.find_all('link', href=True): dots = [x.start(0) for x in re.finditer('\.', link['href'])] if url in link['href'] or domain in link['href'] or len(dots) == 1: success = success + 1 i = i + 1 for script in soup.find_all('script', src=True): dots = [x.start(0) for x in re.finditer('\.', script['src'])] if url in script['src'] or domain in script['src'] or len( dots) == 1: success = success + 1 i = i + 1 try: percentage = success / float(i) * 100 except: data_set.append(1) if percentage < 17.0: data_set.append(1) elif ((percentage >= 17.0) and (percentage < 81.0)): data_set.append(0) else: data_set.append(-1) #16. SFH for form in soup.find_all('form', action=True): if form['action'] == "" or form['action'] == "about:blank": data_set.append(-1) break elif url not in form['action'] and domain not in form['action']: data_set.append(0) break else: data_set.append(1) break #17. Submitting_to_email if response == "": data_set.append(-1) else: if re.findall(r"[mail\(\)|mailto:?]", response.text): data_set.append(1) else: data_set.append(-1) #18. Abnormal_URL if response == "": data_set.append(-1) else: if response.text == "": data_set.append(1) else: data_set.append(-1) #19. Redirect if response == "": data_set.append(-1) else: if len(response.history) <= 1: data_set.append(-1) elif len(response.history) <= 4: data_set.append(0) else: data_set.append(1) #20. on_mouseover if response == "": data_set.append(-1) else: if re.findall("<script>.+onmouseover.+</script>", response.text): data_set.append(1) else: data_set.append(-1) #21. RightClick if response == "": data_set.append(-1) else: if re.findall(r"event.button ?== ?2", response.text): data_set.append(1) else: data_set.append(-1) #22. popUpWidnow if response == "": data_set.append(-1) else: if re.findall(r"alert\(", response.text): data_set.append(1) else: data_set.append(-1) #23. Iframe if response == "": data_set.append(-1) else: if re.findall(r"[<iframe>|<frameBorder>]", response.text): data_set.append(1) else: data_set.append(-1) #24. age_of_domain if response == "": data_set.append(-1) else: try: registration_date = re.findall( r'Registration Date:</div><div class="df-value">([^<]+)</div>', whois_response.text)[0] if diff_month(date.today(), date_parse(registration_date)) >= 6: data_set.append(-1) else: data_set.append(1) except: data_set.append(1) #25. DNSRecord dns = 1 try: d = whois.whois(domain) except: dns = -1 if dns == -1: data_set.append(-1) else: if registration_length / 365 <= 1: data_set.append(-1) else: data_set.append(1) #26. web_traffic try: rank = BeautifulSoup( urllib.request.urlopen( "http://data.alexa.com/data?cli=10&dat=s&url=" + url).read(), "xml").find("REACH")['RANK'] rank = int(rank) if (rank < 100000): data_set.append(1) else: data_set.append(0) except TypeError: data_set.append(-1) #27. Page_Rank try: if global_rank > 0 and global_rank < 100000: data_set.append(-1) else: data_set.append(1) except: data_set.append(1) #28. Google_Index site = search(url, 5) if site: data_set.append(1) else: data_set.append(-1) #29. Links_pointing_to_page if response == "": data_set.append(-1) else: number_of_links = len(re.findall(r"<a href=", response.text)) if number_of_links == 0: data_set.append(1) elif number_of_links <= 2: data_set.append(0) else: data_set.append(-1) #30. Statistical_report url_match = re.search( 'at\.ua|usa\.cc|baltazarpresentes\.com\.br|pe\.hu|esy\.es|hol\.es|sweddy\.com|myjino\.ru|96\.lt|ow\.ly', url) try: ip_address = socket.gethostbyname(domain) ip_match = re.search( '146\.112\.61\.108|213\.174\.157\.151|121\.50\.168\.88|192\.185\.217\.116|78\.46\.211\.158|181\.174\.165\.13|46\.242\.145\.103|121\.50\.168\.40|83\.125\.22\.219|46\.242\.145\.98|' '107\.151\.148\.44|107\.151\.148\.107|64\.70\.19\.203|199\.184\.144\.27|107\.151\.148\.108|107\.151\.148\.109|119\.28\.52\.61|54\.83\.43\.69|52\.69\.166\.231|216\.58\.192\.225|' '118\.184\.25\.86|67\.208\.74\.71|23\.253\.126\.58|104\.239\.157\.210|175\.126\.123\.219|141\.8\.224\.221|10\.10\.10\.10|43\.229\.108\.32|103\.232\.215\.140|69\.172\.201\.153|' '216\.218\.185\.162|54\.225\.104\.146|103\.243\.24\.98|199\.59\.243\.120|31\.170\.160\.61|213\.19\.128\.77|62\.113\.226\.131|208\.100\.26\.234|195\.16\.127\.102|195\.16\.127\.157|' '34\.196\.13\.28|103\.224\.212\.222|172\.217\.4\.225|54\.72\.9\.51|192\.64\.147\.141|198\.200\.56\.183|23\.253\.164\.103|52\.48\.191\.26|52\.214\.197\.72|87\.98\.255\.18|209\.99\.17\.27|' '216\.38\.62\.18|104\.130\.124\.96|47\.89\.58\.141|78\.46\.211\.158|54\.86\.225\.156|54\.82\.156\.19|37\.157\.192\.102|204\.11\.56\.48|110\.34\.231\.42', ip_address) if url_match: data_set.append(-1) elif ip_match: data_set.append(-1) else: data_set.append(1) except: print('Connection problem. Please check your internet connection!') print(data_set) return data_set
#! /usr/bin/python3 import google import sys from googlesearch import search query=sys.argv[1] for i in search(query,tld="co.in",num=5,start=0,stop=5): print(i)
byewishing = [ 'bye bye', 'Have a good day', 'I am happy to help you', 'see you again later', 'see you next time!farewell' ] while True: # query=takecommand().lower() query = input() checkgreet = query.split(" ") if any(item in checkgreet for item in (grettings or wishes)): greet = random.choice(tuple(grettings)) speak(greet) elif 'search' in query: query = query.replace("search", "") if 'google' in query: query = query.replace('google', '') for link in search(query, num_results=5, lang="en-in"): webbrowser.open_new_tab(link) elif 'ganna' in query: speak('Searching on gaana') query = query.replace('bye', '') query = query.replace('play', '') query = query.replace('song', '') query = query.replace('on', '') query = query.replace('in', '') query = query.replace('ganna', '') play_audio(query) elif 'youtube' in query or 'you tube' in query: speak('searching on youtube') query = query.replace("youtube", "") query = query.replace('on', '') query = query.replace('in', '')