예제 #1
0
def do_query(query, print_flag=False, *, n_result = 10):
    try:
        from googlesearch import search
    except ImportError:
        print("No module named 'google' found")

    # to search
    #query = "Geeksforgeeks"

    for j in search(query, tld="com", num=n_result, stop=1, pause=2):
        if print_flag:
            print(j)
예제 #2
0
                speak("According to Wikipedia")
                speak(results)

            except Exception as e:
                print(e)
                sys.stdout.flush()
                speak(e)

        elif 'search google' in query:
            speak("What do you want me to search?")
            print("Listening.....")
            sys.stdout.flush()
            gs = takeCommand().lower()

            speak("Searching Google...")
            arr = list(search(gs, tld='co.in', lang='en',
                              num=5, start=0, stop=5, pause=1.2))
            clone_arr = list()

            for i in range(len(arr)):
                clone_arr.append(f"{i+1}. " + arr[i])

            google_outs = ''
            google_outs += "<br>".join(clone_arr)
            print(google_outs+"<br>"*2 + "Listening....")

            sys.stdout.flush()

            # time.sleep(1.5)

            speak("Do you want me to open any of these??")
예제 #3
0
def get_arxiv_urls(paper_titles, output_json_dir=None):
    """Get a list of arxiv urls from paper_titles

    One way to do it is to install googler and use it.
    ```
    for filename in files[:1]:
        os.system('googler -n 1 "{}" arxiv.org filetype:pdf'.format(filename))
    ```

    Note that google search has a query limit. Per instructions here:
    https://github.com/abenassi/Google-Search-API/blob/master/google/modules/utils.py#L81
    >>  You may also wanna wait some time between queries, say, randint(50,65)
        between each query, and randint(180,240) every 100 queries, which is
        what I found useful.

    Here we use python lib googlesearch for portability

    TODO: try https://github.com/abenassi/Google-Search-API to avoid double query
    """
    urls = []
    for idx_query, paper_title in tqdm(list(enumerate(paper_titles[:]))):
        success = False
        num_trial = 0
        sleep_in_seconds = 0
        query_results = []
        query = '{} arxiv.org'.format(paper_title)
        print(query)
        # try num_trial times with sleep_in_seconds intervals before moving on to next search
        while not success and num_trial < 2:
            time.sleep(sleep_in_seconds)
            try:
                # convert returned iterator to list to catch the error here
                query_results = list(
                    googlesearch.search(query, stop=1, pause=2))
            except:
                sleep_in_seconds = random.randint(180, 240) * (num_trial + 1)
                print('Warning: sleep and retry in {} seconds'.format(
                    sleep_in_seconds))
                continue
            num_trial += 1
            success = True

        for url in query_results:
            if 'arxiv.org/pdf' in url:
                # cannot scrape the title of a web page containing a pdf file
                continue
            # sometimes google gives a numeric IP as of arxiv.
            url = 'https://arxiv.org/' + '/'.join(
                url.replace('https://', '').split('/')[-2:])
            try:
                url_title = google_scrape(url)
            except:
                url_title = ''
            print('url ', url)
            print('url title ', url_title)
            urls.append({
                'url_title': url_title,
                'url': url,
                'query': paper_title,
            })
    if os.path.isdir(output_json_dir):
        with open(os.path.join(output_json_dir, 'query_urls.json'),
                  'a') as f_out:
            json.dump(urls, f_out, indent=4, sort_keys=True)
    return urls
예제 #4
0
def searching(item):
    searching = search(item, num_results=5)
    for i in searching:
        search_list.append(i)
    return search_list
예제 #5
0
import webbrowser
from googlesearch import search
import pyqrcode
from pyqrcode import QRCode

f = open("glinks", 'w')
web = input("ENTER TOPIC TO SEARCH ON GOOGLE :")
url_list = []
x = 1
for i in search(web, tld="co.in", lang="eng", num=3, start=0, stop=3, pause=2):
    url_list.append(i)
    print(i)
    url = pyqrcode.create(i)
    webbrowser.open(i)
    url.svg("qrcode" + str(x) + ".svg", scale=6)
    x = x + 1
예제 #6
0
    cursor = connection.cursor()
    cursor.execute(sql_select_Query)
    records = cursor.fetchall()
    print("Number of games: ", cursor.rowcount)

    print("\nFor each game now:")
    # row[0] = id, row[1] = name
    for row in records:
        games[row[0]] = row[1]

    # searching
    for id in games:
        query = "Buy " + games[id] + "digital copy"
        links = []
        domains = []
        for url in search(query, tld="co.in", num=8, stop=8, pause=2):
            domain = url.split("//")[-1].split("/")[0]
            if domain not in domains:
                domains.append(domain)
                links.append(url)
            if len(links) == 3:
                break

        for i in range(0, len(links)):
            try:
                sql_insert_Query = "insert into store_links (id_game, domain_name, link) values (%s, %s, %s)"
                val = (id, domains[i], links[i])
                cursor2 = connection.cursor()
                cursor2.execute(sql_insert_Query, val)
                print("a intrat ", games[id])
            except Error as e:
예제 #7
0
def scrape_company(company,
                   max_depth=MAX_DEPTH,
                   max_entry_links=MAX_ENTRY_LINKS,
                   max_total_links=MAX_TOTAL_LINKS):
    # Open up Selenium web browser
    options = webdriver.ChromeOptions()
    options.add_argument('headless')
    options.add_argument('--incognito')
    options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64 '
                         'AppleWebKit/537.36 (KHTML, like Gecko) '
                         'Chrome/70.0.3538.77 Safari/537.36')
    driver = webdriver.Chrome(options=options)

    # Find company website, abbreviation
    for url in search(company + ' careers website', stop=1):
        company_website = urlparse(url).netloc
    company_abbrev = company.lower().replace(' ', '').replace('.', '')

    print('Company is {0}.'.format(company))
    print('Company careers website is {0}.'.format(company_website))

    # Intially, frontier is populated a Google search
    # These links must originate at the company website found earlier
    frontier = PriorityQueue()
    visited = set()
    search_term = company + ' internship apply'
    for entry_link in search(search_term, stop=max_entry_links):
        if urlparse(entry_link).netloc != company_website:
            continue
        entry_link = normalize_link(entry_link)
        heuristic = score_link_heuristic(entry_link, company)
        frontier.put((-heuristic, (entry_link, 1)))
        visited.add(entry_link)

    # This method allows us to vet & add child links to the frontier
    def explore_child_links(links):
        for link in links:
            # Filter out obviously bad links
            if not link or len(link) <= 2 or link[0] == '#' or \
                    'javascript:void' in link or link.startswith('mailto'):
                continue

            # Fix relative links, trim trailing slashes, etc
            link = normalize_link(link,
                                  parent=current,
                                  parent_base=current_base)

            # Links must either be tied to the company or
            # an external job application website
            domain = urlparse(link).netloc
            if domain != company_website and \
                    company_abbrev not in domain and \
                    'taleo' not in domain and \
                    'workday' not in domain and \
                    'greenhouse' not in domain and \
                    'jobvite' not in domain and \
                    'icims' not in domain:
                continue

            # PDF or image links should not be followed
            if link.endswith('.pdf') or link.endswith('.jpg') or \
                    link.endswith('.jpg'):
                continue

            # Skip links that have already been added to the frontier
            if link in visited:
                continue

            heuristic = score_link_heuristic(link, company_abbrev)
            frontier.put((-heuristic, (link, depth + 1)))
            visited.add(link)

    results = []
    link_idx = 0

    while frontier and link_idx < max_total_links:
        # Extract current link we are on and the link's root (excludes path)
        neg_heuristic, (current, depth) = frontier.get()
        current_parse = urlparse(current)
        current_loc = current_parse.netloc
        current_base = current_parse.scheme + '://' + current_loc

        # TODO: Convert from printing to logging
        print('Visiting ... {0} (depth={1}, lh={2})'.format(
            current, depth, -neg_heuristic))

        # Use Selenium to fetch our page, wait a bit for the page to load
        driver.get(current)
        time.sleep(2)
        content = driver.page_source

        # Determine whether a page is explorable without doing
        # any HTML parsing by doing some primitive checks
        lcontent = content.lower()
        explorable = False
        for keyword in ['job', 'career', 'intern']:
            if keyword in lcontent:
                explorable = True
                break
        if not explorable:
            continue

        link_idx += 1

        # Parse HTML using BS4, discard links in header and footer
        soup = BeautifulSoup(content, 'lxml')
        if soup.header:
            soup.header.decompose()
        if soup.footer:
            soup.footer.decompose()

        # Assign score to page based off of BS4 parse
        page_score = score_page(soup, company_abbrev)
        iframes = driver.find_elements_by_tag_name('iframe')
        for iframe in iframes:
            driver.switch_to.frame(iframe)
            isoup = BeautifulSoup(driver.page_source, 'lxml')
            if isoup.header:
                isoup.header.decompose()
            if isoup.footer:
                isoup.footer.decompose()
            page_score = max(page_score, score_page(isoup, company_abbrev))
        driver.switch_to.default_content()
        if page_score > 0:
            results.append((current, page_score))

        # Child exploration cannot exceed the given maximum depth
        if depth < max_depth:
            # Collect links from anchor tags
            explore_child_links(
                [a.get('href') for a in soup.find_all('a', href=True)])

            # Collect links from each iframe separately
            for iframe in iframes:
                driver.switch_to.frame(iframe)
                isoup = BeautifulSoup(driver.page_source, 'lxml')
                if isoup.header:
                    isoup.header.decompose()
                if isoup.footer:
                    isoup.footer.decompose()
                explore_child_links(
                    [a.get('href') for a in isoup.find_all('a', href=True)])

    # Close the browser instance that Selenium opened
    driver.close()

    # Find all result links that have the maximum score
    if results:
        max_score = max([score for _, score in results])
        max_links = [link for link, score in results if score == max_score]
        return (max_links, max_score)
    else:
        return ([], 0)
예제 #8
0
selection = int(
    input(
        "\nPlease select a dork:\n[1] cat\n[2] id\n[3] article\n[4] page\n[5] bookid\n[6] Custom dork\n"
    ))
if selection == 6:
    dork = input(
        "\nPlease enter the dork (ex: for ?id= you would just enter id)\n")
else:
    dork = dorkList[selection - 1]
finalDork = "inurl: ?" + dork + "="
searchAmount = int(input("\nPlease enter an amount of links to test:\n"))
print("\nSearching and testing...\n")
for x in search(finalDork,
                tld='com',
                lang='en',
                num=searchAmount,
                start=0,
                stop=searchAmount,
                pause=2.0):
    url = x + "'"
    res = requests.get(url)
    html_page = res.content
    soup = BeautifulSoup(html_page, 'html.parser')
    text = soup.find_all(text=True)
    for y in text:
        if y.find("You have an error in your SQL") != -1:
            foundUrls.append(x)
            print("\nVulnerable site found:", x)
file = open("output.txt", "a")
for x in foundUrls:
    file.write(x + "\n")
예제 #9
0
    
elif press == 5:
    os.system("date +'%F %T'")
    
elif press == 6:
    print("are you sure ? y/n")
    access=input()
    if access == "y":
        os.system("reboot")
    elif access == "n":
        print("Process aborted")
    else:
        print("wrong keyword pressed")
elif press == 7:
    name=input("Enter your search keyword : ")
    os.system('firefox --search {}'.format(name))
elif press == 8:         
#Install mpg123 packages.(yum install -y mpg123)
    file = "/root/Downloads/hindi.mp3"
    os.system("mpg123 " + file)

elif press == 9:
#Install 'pip3 install google'.
    try:
        from googlesearch import search
    except ImportError:
        print("No module named 'google' found") 
    keyword =input("Enter your keyword to search : ")
    for name in search(keyword, tld="com", num=5, stop=5, pause=2):
        print(name)
예제 #10
0
    def runvisuals(self):
        #Have to make user_id set in the sessions

        #GOOGLE APIS
        try:
            from googlesearch import search
        except ImportError:
                print("We could not find it at all!")
        query = "Instagram"
        for i in search(query, tld="co.in", num=10, stop=10, pause=2):
            print(i)
        self.winfo_toplevel().title("Perseus 1.1.0")
        #self.parent.geometry('2000x900')
        self.parent.configure(background="grey")
        #sideFrame = Frame(self.parent,height=1500,width=100, relief=SUNKEN)
        #sideFrame.grid(row=0,column=0, sticky='w')
        newFrame = Frame(self.parent, height=1500,width=150, relief=SUNKEN)
        newFrame.grid(row=0,column=1, sticky='ne',padx=(0,10))

        e = Entry(newFrame, width=150);
        e.grid(row=0,column=0,padx=(0,10))

        def Search():
            try:
                from googlesearch import search
            except ImportError:
                    print("We could not find it at all!")
            query = e.get()
            r = 2
            for i in search(e.get(), tld="co.in", num=10, stop=10, pause=2):
                LabelResult = Label(newFrame, text=i)
                LabelResult.grid(row=r, column=0, sticky='nw', padx=(0,10))
                ButtonOptions = Button(newFrame, text="Save", command=lambda:SaveLink(e.get(),i, 1))
                ButtonOptions.grid(row=r, column=2, sticky='nw', padx=(0,10))
                r = r + 1


        searchBtn = Button(newFrame,text="Search", command=Search)
        searchBtn.grid(row=1,column=0, sticky='nw',padx=(0,10))

        #newFrame.grid(row=2,column=0)
        #yscrolbar.configure(command=newSpace.yview)
        #xscrollbar.configure(command=newSpace.xview)

        """ Making the Side window with File Tree
>>>>>>> 73be496... Finish
        p = ttk.Panedwindow(sideFrame, orient=VERTICAL)
        # first pane, which would get widgets gridded into it:
        fp = ttk.Labelframe(p, text='File Management', width=200, height=720)
        p.add(fp)
        p.pack(fill=BOTH, expand=1)
        #Tab view
        n = ttk.Notebook(fp)
        f1 = ttk.Frame(n, width=200, height=720)   # first page, which would get widgets gridded into it
<<<<<<< HEAD
        f2 = ttk.Frame(n, width=200, height=720)
        # second page
        n.add(f1, text='Local Files')
        n.add(f2, text='Online Files')

        n.pack(fill="both", expand=1)

        #Toolbar view
        #toolbar = Frame(frame, style='My.TFrame', height=720, width=25, relief=SUNKEN)
        #canvasToolbar = Canvas(toolbar, bg='blue',height=720, width=20)
        #toolbar.pack(side=RIGHT, fill=BOTH, expand=1)
        #toolbar.place(relx=1,rely=0,anchor=NE)


        #buttons on the toolbar
        """for i in range(15):
            button = Button(toolbar, height=1)
            button.pack(side=TOP, fill=BOTH, expand=1)
        """

        """Label"""
        ttk.Label(f1, text="Hierachical Treeview").pack()
        """Treeview"""
        treeview=ttk.Treeview(f1)
        treeview.pack()
        """Treeview items"""
=======

        # second page
        n.add(f1, text='Saved Files')

        n.pack(fill="both", expand=1)
        #Toolbar view
        toolbar = Frame(newFrame, style='My.TFrame', height=720, width=25, relief=SUNKEN)
        canvasToolbar = Canvas(toolbar, bg='blue',height=720, width=20)
        toolbar.pack(side=RIGHT, fill=BOTH, expand=1)
        toolbar.place(relx=1,rely=0,anchor=NE)"




        #buttons on the toolbar
        #options = ["Search", "Save", "Delete"]
        #for i in options:
        #    button = Button(toolbar, text=i)
        #    button.pack(side=TOP, fill=BOTH, expand=1)"""

        """#Label
        ttk.Label(f1, text="Hierachical Treeview").pack()
        #Treeview
        treeview=ttk.Treeview(f1)
        treeview.pack()
        #Treeview items
>>>>>>> 73be496... Finish
        treeview.insert('','0','item1',text='Parent tree')
        treeview.insert('','1','item2',text='1st Child')
        treeview.insert('','end','item3',text='2nd Child')
        treeview.insert('item2','end','A',text='A')
        treeview.insert('item2','end','B',text='B')
        treeview.insert('item2','end','C',text='C')
        treeview.insert('item3','end','D',text='D')
        treeview.insert('item3','end','E',text='E')
        treeview.insert('item3','end','F',text='F')
        treeview.move('item2','item1','end')
<<<<<<< HEAD
        treeview.move('item3','item1','end')
        """Making the compression Rate Label """
        #compressionRateLabel = Label(frame, text="Compression Rate : ",   height=1, width=1648, bd=1, bg="grey")
        #compressionRateLabel.place(relx=0, rely= 1, anchor=S)
        #TestShape = Shapes(frame)
        #TestShape.circle()
        """ Making the menu bar for the application"""
        menubar = Menu(self.parent)
        """Making the file tree """
        localFileTree = Treeview(f1)
=======
        treeview.move('item3','item1','end')"""



        """Making the compression Rate Label """

        #ompressionRateLabel = Label(Shapes.frame, text="Compression Rate : ", width=1648, bd=1, bg="grey")
        #compressionRateLabel.place(relx=0, rely= 1, anchor=S)
        #TestShape = Shapes(newFrame)
        #TestShape.circle(10,10,100,100,"red",)
        """ Making the menu bar for the application"""
        menubar = Menu(self.parent)
        """Making the file tree """
예제 #11
0
#!/usr/bin/python

import webbrowser
from googlesearch import search

#to take input of search
web = input('Enter what to search')
url = []

#Now to search

for each_search in search(web, stop=5):
    url.append(each_search)
    print(each_search)  #print each searched link
    webbrowser.open_new_tab(each_search)
    for each in search(each_search, stop=5):  #to search again in above 5 links
        print(each)  #print each searched link
        webbrowser.open_new_tab(each)
예제 #12
0
def get_urls(tag, n, language):
    urls = [url for url in search(tag, stop=n, lang=language)][:n]
    return urls
예제 #13
0
from googlesearch import search 
import pyqrcode
from pyqrcode import QRCode
urlinput=input("enter text to search")
urllist=[]
for i in search(urlinput, tld='com', lang='en', num=10, start=0, stop=5, pause=2) :
	urllist.append(i)
	print(i)
	url=pyqrcode.create(i)
	for j in range(5) :
		url.svg(str(j)+".svg",scale=8)
		print(url.terminal())
예제 #14
0
# ****
def spinning_cursor():
    while True:
        for cursor in '|/-\\':
            yield cursor


spinner = spinning_cursor()
for _ in range(100):
    sys.stdout.write(next(spinner))
    sys.stdout.flush()
    time.sleep(0.1)
    sys.stdout.write('\b')
#*****
for gamma in search(query, tld=beta, stop=50, num=10, pause=2):
    print(colored('[+] Found > ', 'yellow') + (gamma))
print(colored('[+] 20% done ', 'green'))
B = """ inurl:dtm.html intitle:1747-L551 """
query = B


# ****
def spinning_cursor():
    while True:
        for cursor in '|/-\\':
            yield cursor


spinner = spinning_cursor()
for _ in range(100):
예제 #15
0
def extractFraseGoogle(frase, cantRes):
	print ("Searching emails... please wait")
	print ("This operation may take several minutes")
	try:
		listUrl = []
		count = 0

		for url in search(frase, stop=cantRes):
			listUrl.append(url)

		for i in listUrl:
			try:
				req = urllib.request.Request(
							i, 
							data=None, 
							headers={
							'User-Agent': ua.random
							})
				try:
					conn = urllib.request.urlopen(req)
				except timeout:
					print("Bad Url..")
					time.sleep(2)
					pass
				except(HTTPError, URLError):
					print("Bad Url..")
					time.sleep(2)
					pass

				status = conn.getcode()
				contentType = conn.info().get_content_type()

				if(status != 200 or contentType == "audio/mpeg"):
					print("Bad Url..")
					time.sleep(2)
					pass

				html = conn.read()

				soup = BeautifulSoup(html, "lxml")
				links = soup.find_all('a')

				print("They will be analyzed " + str(len(links) + 1) + " Urls..." )
				time.sleep(2)

				for tag in links:
					link = tag.get('href', None)
					if link is not None:
						try:
							print ("Searching in " + link)
							if(link[0:4] == 'http'):
								req = urllib.request.Request(
										link, 
										data=None, 
										headers={
										'User-Agent': ua.random
										})	
								try:
									f = urllib.request.urlopen(req)
								except timeout:
									print("Bad Url..")
									time.sleep(2)
									pass
								except(HTTPError, URLError):
									print("Bad Url..")
									time.sleep(2)
									pass

								status = conn.getcode()
								contentType = conn.info().get_content_type()

								if(status != 200 or contentType == "audio/mpeg"):
									print("Bad Url..")
									time.sleep(2)
									pass

								s = f.read().decode('utf-8')
								emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", s)
								for email in emails:
									if (email not in listUrl and email[-3:] not in imageExt):
										count += 1
										print(str(count) + " - " + email)										
										listUrl.append(email)
										if (searchEmail("Emails.db", email, frase) == 0):
											insertEmail("Emails.db", email, frase, link)
											
						# Sigue si existe algun error
						except Exception:
							pass
		
				print(str(count) + " emails were found")

			except urllib.error.URLError as e:
				print("Problems with the url:" + i)
				print(e)
				pass

	except KeyboardInterrupt:
		input("Press return to continue")
		menu()

	except Exception as e:
		print(e)
		input("Press enter to continue")
		menu()
예제 #16
0
print("Enter Your Choice : ")
ch=int(input("1)TEXT\t2)VOICE"))

if ch==2:
	r=sr.Recognizer()

	with sr.Microphone() as source:
		print("Listening...."); 
		data=r.listen(source)		
		print("Searching...") 
		try: 

			query=r.recognize_google(data) 
		except: 
			pass; 
elif ch==1: 
	query=input("Type the Query: ") 

else : 
	print("Invalid") 
	exit 

url=[] 

for i in search(query,stop=10): 
	print(i) 
	url.append(i) 

print(url)
예제 #17
0
def start():



    time.sleep(2)
    print("what is your name sir?  ")
    os.startfile("wiyn.mp3")
    time.sleep(1.3)
    with mic as source:
        try:
            print("---listening---")
            r.adjust_for_ambient_noise(source)
            audio=r.listen(source)
            print("recognising...")
            name=r.recognize_google(audio)
            print(name)
        except:
            name=("Sir")

    greet= gTTS(text=name,lang="hi")
    greet.save("name.mp3")
    os.startfile("hello.mp3")
    os.startfile("hello.mp3")
    time.sleep(1.1)
    os.startfile("name.mp3")
    time.sleep(2.0)
    text=("I am Mouli. your virtual assistant, say ' hey molly ', if you need any help ")
    intro=gTTS(text=text,lang="en")
    intro.save("text.mp3")
    os.startfile("text.mp3")
    time.sleep(4.3)




    
    while True:
        bkchd=-5
        with mic as source:
            try:
                print("listening")
                r.adjust_for_ambient_noise(source)
                audio=r.listen(source)
                print("recognising")
                openw=r.recognize_google(audio)
                if (openw=="Hemali" or openw=="hey only" or openw=="hey money" or openw=="hey Molly"):
                    os.startfile("beep.mp3")
                    print("verified")
                    bkchd=5
                
            
            except:
                a=("hye")
        if (bkchd>0):
            hcihy=gTTS(text="how can i help you?",lang="en")
            hcihy.save("hcihy.mp3")
            os.startfile("hcihy.mp3")
            time.sleep(2)
            with mic as source:
                try:
                    print("---listening---")
                    r.adjust_for_ambient_noise(source)
                    audio=r.listen(source)
                    print("recognising...")
                    task=r.recognize_google(audio)
                    print(task)
                except:
                    print("could not recognise")
                    task=("hi Molly")
                    bkchd=-5
            j = search(task,num=1,tld="com",lang="en",stop=2,pause=1,start=1)
            for i in j:
                website=i
            w1=task.find("remind")
            w2=task.find("play")
            w3=task.find("open")
            w4=task.find("music")
            w5=task.find("stop listening")
            w6=task.find("shutdown")
            w7=task.find("joke")
            w8=task.find("please molly")
            w9=task.find("hi Molly")
            if (w2>=0):
                os.startfile("suresir.mp3")
                time.sleep(3.5)
                ntask=(task)
                n1=ntask.replace("on YouTube","")
                n1=n1.replace("on gaana","")
                n2=n1.replace("play","")
                n3=n2.replace("video","")
                n0=n3.replace("song","")
                n5=n0.replace("1","")
                n5=n5.replace("2","")
                n5=n5.replace("3","")
                n5=n5.replace("4","")
                n5=n5.replace("5","")
                n5=n5.replace("6","")
                n5=n5.replace("7","")
                n5=n5.replace("8","")
                n5=n5.replace("9","")
                n5=n5.replace("0","")
                n4=n5.replace("and stop listining for","")
                n4=n4.replace("minute","")
                n4=n4.replace("minutes","")
                n4=n4.replace("second","")
                n4=n4.replace("seconds","")
                
                


                w10=task.find("movie")
                w11=task.find("song")
                if (w11>=0):
                    w13=task.find("gaana")
                    if (w13>=0):
                        webbrowser.open("www.gaana.com")
                        time.sleep(4)
                        keyboard.press(Key.space)
                        keyboard.release(Key.space)
                        bkchd=-5
                    else:
                        y=1
                        webbrowser.open("www.youtube.com")
                        time.sleep(5)
                        keyboard.press(Key.tab)
                        keyboard.release(Key.tab)
                        keyboard.press(Key.tab)
                        keyboard.release(Key.tab)
                        keyboard.press(Key.tab)
                        keyboard.release(Key.tab)
                        keyboard.type(n4)
                        keyboard.press(Key.enter)
                        keyboard.release(Key.enter)
                        time.sleep(2)
                        for y in range (1,11):
                            keyboard.press(Key.tab)
                            keyboard.release(Key.tab)
                            y=y+1
                            time.sleep(0.5)
                        keyboard.press(Key.enter)
                        keyboard.release(Key.enter)
                        bkchd=-5
                elif(w10>=0):
            
                    webbrowser.open("www.khatrimaza.link")
                    bkchd=-5
                else:
                    y=1
                    webbrowser.open("www.youtube.com")
                    time.sleep(5)
                    keyboard.press(Key.tab)
                    keyboard.release(Key.tab)
                    keyboard.press(Key.tab)
                    keyboard.release(Key.tab)
                    keyboard.press(Key.tab)
                    keyboard.release(Key.tab)
                    keyboard.type(n4)
                    keyboard.press(Key.enter)
                    keyboard.release(Key.enter)
                    time.sleep(2)
                    for y in range (1,11):
                        keyboard.press(Key.tab)
                        keyboard.release(Key.tab)
                        y=y+1
                        time.sleep(0.5)
                    keyboard.press(Key.enter)
                    keyboard.release(Key.enter)
                    bkchd=-5

            
            if(w7>=0):
                os.startfile("suresir.mp3")
                time.sleep(1)
                i=random.randint(0,1)
                if (i==0):
                    joke1()
                elif (i==1):
                    joke2()
                bkchd=-5

            
            if(w6>=0):
                os.startfile("suresir.mp3")
                print("Bye...")
                time.sleep(2)
                subprocess.call(["shutdown", "-f", "-s", "-t", "60"])
                bkchd=-5
            if(w3>=0):
                os.startfile("suresir.mp3")
                time.sleep(2)
                fname1=task.replace("open","")
                fname2=fname1.replace("game","")
                fname3=fname2.replace("folder","")
                fname4=fname3.replace("file","")
                fname41=fname4.replace("calculator","calc")
                fname5=fname41.replace("app","")
        
                keyboard.press(Key.cmd)
                keyboard.release(Key.cmd)
                time.sleep(0.05)
                keyboard.type(fname5)
                time.sleep(0.15)
                keyboard.press(Key.enter)
                keyboard.release(Key.enter)
                bkchd=-5


            if(w1<0 and w2<0 and w3<0 and w4<0 and w5<0 and w6<0 and w7<0 and w8<0 and w9<0):
                os.startfile("suresir.mp3")
                a=0
                i=0
                ntask=(task)
                n1=ntask.replace("on YouTube","")
                n1=n1.replace("on gaana","")
                n2=n1.replace("play","")
                n3=n2.replace("video","")
                n0=n3.replace("song","")
                n5=n0.replace("1","")
                n5=n0.replace("2","")
                n5=n0.replace("3","")
                n5=n0.replace("4","")
                n5=n0.replace("5","")
                n5=n0.replace("6","")
                n5=n0.replace("7","")
                n5=n0.replace("8","")
                n5=n0.replace("9","")
                n5=n0.replace("0","")
                n4=n5.replace("and stop listining for","")
                n4=n4.replace("minute","")
                n4=n4.replace("minutes","")
                n4=n4.replace("second","")
                n4=n4.replace("seconds","")
                for i in range(0,6):
                    prob=task.find(question[i])+1
                    a=a+prob
                    i=i+1
                if(a==0):
                    
                    try:
                        detail= (wikipedia.summary(task,sentences=2))
                        tts= gTTS(text=detail,lang="en")
                        tts.save("a.mp3")
                        os.startfile("a.mp3")
                        print(detail)
                        time.sleep(15)
                        bkchd=-5
                    except:
                        print ("I can search it for you")
                        time.sleep(1)
                        webbrowser.open(website)
                        bkchd=-5
                
                else:
                    print ("I can search it for you")
                    time.sleep(1)
                    webbrowser.open(website)
                    bkchd=-5

        
                
            if(w5>=0):
                f1=task.find("minute")
                f2=task.find("minutes")
                a1=task.replace("a","")
                a2=a1.replace("b","")
                a3=a2.replace("c","")
                a4=a3.replace("d","")
                a5=a4.replace("e","")
                a6=a5.replace("f","")
                a7=a6.replace("g","")
                a8=a7.replace("h","")
                a9=a8.replace("i","")
                a10=a9.replace("j","")
                a11=a10.replace("k","")
                a12=a11.replace("l","")
                a13=a12.replace("m","")
                a14=a13.replace("n","")
                a15=a14.replace("o","")
                a16=a15.replace("p","")
                a17=a16.replace("q","")
                a18=a17.replace("r","")
                a19=a18.replace("s","")
                a20=a19.replace("t","")
                a21=a20.replace("u","")
                a22=a21.replace("v","")
                a23=a22.replace("w","")
                a24=a23.replace("x","")
                a25=a24.replace("y","")
                a26=a25.replace("z","")
                task=a26
                a1=task.replace("A","")
                a2=a1.replace("B","")
                a3=a2.replace("C","")
                a4=a3.replace("D","")
                a5=a4.replace("E","")
                a6=a5.replace("F","")
                a7=a6.replace("G","")
                a8=a7.replace("H","")
                a9=a8.replace("I","")
                a10=a9.replace("J","")
                a11=a10.replace("K","")
                a12=a11.replace("L","")
                a13=a12.replace("M","")
                a14=a13.replace("N","")
                a15=a14.replace("O","")
                a16=a15.replace("P","")
                a17=a16.replace("Q","")
                a18=a17.replace("R","")
                a19=a18.replace("S","")
                a20=a19.replace("T","")
                a21=a20.replace("U","")
                a22=a21.replace("V","")
                a23=a22.replace("W","")
                a24=a23.replace("X","")
                a25=a24.replace("Y","")
                a26=a25.replace("Z","")
                a27=a26.replace(" ","")
                print(a27)
                
                if(f1>=0 or f2>=0):
                    a0=int(a27)
                    a4=a0*60
                else:
                    a4=int(a27)
                os.startfile("suresir.mp3")
                time.sleep(a4)
                bck=gTTS(text="Back again. sir",lang="en")
                bck.save("backagain.mp3")
                os.startfile("backagain.mp3")
                time.sleep(1.5)
예제 #18
0
try:
    from googlesearch import search
except ImportError:
    print("No module named 'google' found")

# to search
query = "Geeksforgeeks"

for j in search(
        query,
        tld="co.in",
        num=10,
        stop=10,
        pause=2,
        user_agent=
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
):
    print(j)

# import urllib.request
# import json
# x = urllib.request.urlopen('http://testpy.pickandpaid.com/test/')
# data = x.read()
# JSON_object = json.loads(data.decode('utf-8'))
# # print(JSON_object)

# for data in JSON_object:
#     print(JSON_object[data])
def digital_assistant(data):
    try:
        if "how are you" in data: 
            respond("I am well")
            return
            
        elif "time" in data:      
            respond(ctime())
            return

        elif "who are you" in data or "what can you do" in data or "define yourself" in data:
            respond("I am viswanadh's personal assistant, I am programmed to do minor tasks like system monitoring, profiling,"
            "predict time, take a photo, predict weather,"
            " opening applications like youtube, google chrome ,gmail etcetre, show the top headline news and you can ask me computational or geographical questions too!")
            return

        elif "who made you" in data or "who created you" in data:
            respond("I was built by viswa")
            return

        elif "shutdown" in data:
            respond("Are you sure! you want to shutdown your computer")
            data = listen()
            if data == "yes":
                respond("system is going to shutdown...")
                os.system("taskkill /f /im Rainmeter.exe")
                os.system("shutdown /s /t 1")
                return
        
        elif "restart" in data:
            respond("want to restart your computer")
            data=listen()
            if data=="yes":
                os.system("shutdown /r /t 1")
                return
        
        elif "battery" in data:
            battery=psutil.sensors_battery()
            respond("Your system is at " + str(battery.percent) + " percent")
            return

        elif "cpu" in data:
            respond("CPU is at "+ str(psutil.cpu_percent()))
            return

        elif "music" in data:
            respond("Here you go with music")
            music_dir = "C:\\Users\\VISWANADH\\Music"
            song = random.choice(os.listdir(music_dir))
            os.startfile(os.path.join(music_dir,song))
            time.sleep(5)
            return

        elif "movie" in data:
            os.system("D:\\movies\\Ala_Vaikunthapurramloo.mkv")
            time.sleep(5)
            return
        
        elif "notepad" in data:
            os.system("notepad")
            return

        elif "open" in data:
            data = data.split(" ")
            query = data[1]
            for j in search(query, tld='com', lang='en', num=1, start=0, stop=1, pause=2.0):
                url=j
            webbrowser.get('chrome').open_new(url)
            respond(data[1] + " is open now")
            time.sleep(7)
            return

        elif "news" in data:
            query = "news"
            url="https://timesofindia.indiatimes.com/home/headlines"
            webbrowser.get('chrome').open_new(url)
            respond("Here are some headlines from the Times of India,Happy reading")
            time.sleep(5)
            return
                
        elif "weather" in data:
            data=data.split(" ")
	    #create key: https://home.openweathermap.org/users/sign_in
            api_key = ####################
            base_url = "https://api.openweathermap.org/data/2.5/weather?"
            if "in" not in data:
                city_name = "kurupam"
            else:
                city_name = data[-1]
            complete_url = base_url + "appid=" + api_key + "&q=" + city_name
            response = requests.get(complete_url)
            x = response.json()
            if x["cod"] != "404":
                y = x["main"]
                current_temperature = y["temp"]
                current_humidiy = y["humidity"]
                z = x["weather"]
                weather_description = z[0]["description"]
                respond(" Temperature in kelvin unit at " + city_name + " is " +
                      str(current_temperature) +
                      "\n humidity in percentage is " +
                      str(current_humidiy) +
                      "\n description  " +
                      str(weather_description))
                return
            else:
                respond(city_name + " weather details not found")
                return
        
        elif "something" in data:
            respond("Searching...")
            data=data[22:]
            data =  "According to wikipedia " + wikipedia.summary(data, sentences=4) 
            respond(data)
            return

        elif "capture the photo" in data or "take a photo" in data:
            ec.capture(0,False,"img.jpg")
            respond("photo captured successfully")
            return

        elif "video" in data or "capture the video" in data:
            ec.auto_vidcapture(0,False,"video.mkv",10)
            respond("video recorded successfully")
            return

        elif "access" in data:
            access()
            return
        
        elif "where is" in data:
            data = data.split(" ")
            name = data[-1]
            url = "https://www.google.com/maps/place/"+name
            webbrowser.get('chrome').open_new(url)
            time.sleep(5)
            return

        elif "write a note" in data:
            respond("What should i write, sir!")
            data = listen()
            file = open('note.txt', 'w')
            file.write(data)
            respond("noted successfully")
            return
        
        elif "execute" in data:
            execute_commands()
            return

        elif "upcoming events" in data or "scheduled events" in data or "events" in data:
            events = calendar_events()
            return

        elif "game" in data or "play" in data:
            try:
                tic_tac_toe()
                return
            except:
                return

        elif "create event" in data:
            create_event()
            return
            
        elif "speed test" in data:
            try:
                respond("sure! wait a second to measure")
                st = speedtest.Speedtest()
                server_names = []
                st.get_servers(server_names)
                ping = st.results.ping
                downlink_Mbps = round(st.download() / 1000000, 2)
                uplink_Mbps = round(st.upload() / 1000000, 2)
                respond('ping {} ms'.format(ping))
                respond("The uplink is {} Mbps".format(uplink_Mbps))
                respond("The downlink is {}Mbps".format(downlink_Mbps))
                return
            except:
                respond ("I couldn't run a speedtest")     
                return              
        
        elif "memory" in data:
            process_id = os.getpid()
            py = psutil.Process(process_id)
            memory_use = round(py.memory_info()[0]/2. **30, 2)
            respond("I use {} Gb of memory".format(memory_use))
            return
        
        elif "internet connection" in data or "internet" in data:
            if internet_availability():
                respond("Internet Connection is okay!")
            return
       elif 'email to' in data:
            try:
                respond("Sir, give me your message")
                print('Give message.......')
                content = takeCommand()
                to = "receiver email address"
                sendEmail(to, content)
                print('Sending mail........')
                respond("Email has been sent!")
            except Exception as e:
                print(e)
                respond("Sorry master . I am not able to send this email")
예제 #20
0
#from extract import extract_news_from_page, get_full_html_from_news, get_title, content_sportv, extract_text_from_news_link
from googlesearch import search

query = "globoesporte.globo.com/futebol/ /noticia/2016"
result = search(query,
                tld='com',
                lang='pt-br',
                num=10,
                start=0,
                stop=50,
                pause=2.0)

news_links = []
for r in result:
    if "/2016/" in r:
        news_links.append(r)

print(news_links)
예제 #21
0
    wordcount = {}
    for word in a.lower().split():
        word = word.replace(".", "")
        word = word.replace(",", "")
        word = word.replace(":", "")
        word = word.replace("\"", "")
        word = word.replace("!", "")
        word = word.replace("?", "")
        if word not in stopwords:
            if word not in wordcount:
                wordcount[word] = 1
            else:
                wordcount[word] += 1
    # Print most common word
    n_print = int(input("How many key words to google?: "))

    word_counter = collections.Counter(wordcount)
    googleString = ""
    for word, count in word_counter.most_common(n_print):
        googleString = googleString + " " + word

    try:
        from googlesearch import search
    except ImportError:
        print("No module named 'google' found")
    for j in search(googleString, tld="co.in", num=10, stop=5, pause=2):
        print(j)

    file.close()
예제 #22
0
def main():
    running = True
    while running:
        #query = "U.S. president after Lincoln, almost impeached by the Radical Republicans quizlet"
        query = input("Whats the question? (Type 'q' to quit)\n\n")
        if query == "q":
            running = False
            break
        # Get all the urls
        urls = []
        #os.environ['HTTP_PROXY'] = 'http://172.16.0.3:8888'
        for j in search(query,
                        tld="co.in",
                        num=numSites,
                        stop=numSites,
                        pause=2):
            if ("quizlet" in j):
                n = findnth(j, "/", 3)
                jfinal = j[0:n + 1]
                urls.append(jfinal)
                #print(jfinal)

        # Check data from quizlets to see if theres relevant stuff
        pp = pprint.PrettyPrinter(indent=4)
        found = False
        for url in urls:
            data = get_data_from(url)  # Just doing the first one for now
            for pair in data:
                t = pair["term"]
                d = pair["definition"]
                keyword_prune = prune(query.lower())
                matchedT = re.findall(keyword_prune, t)
                matchedD = re.findall(keyword_prune, d)
                if len(matchedT) != 0 or len(matchedD) != 0:
                    found = True
                    print(
                        "\n\n -------------------------Found! (with prune)-------------------------\n\n"
                    )
                    pp.pprint(pair)
                    print(
                        "\n\n ---------------------------------------------------------------------\n\n"
                    )

            # Now do the exact same thing with lnsw method
            if not found:
                for pair in data:
                    t = pair["term"]
                    d = pair["definition"]
                    keyword_lnsw = LNSW(query)
                    if keyword_lnsw == None:
                        print("None keyword_lnsw")
                        exit
                    matchedT = re.findall(keyword_lnsw, t)
                    matchedD = re.findall(keyword_lnsw, d)
                    if len(matchedT) != 0 or len(matchedD) != 0:
                        found = True
                        print(
                            "\n\n -------------------------Found! (with LNSW:",
                            keyword_lnsw, ")-------------------------\n\n")
                        pp.pprint(pair)
                        print(
                            "\n\n -------------------------------------------------------------\n\n"
                        )

                #print(matches)
            #pp.pprint(data)
        if not found: print("\nNothing found.\n")
예제 #23
0
import speech_recognition as sr
import time
import webbrowser
from googlesearch import search
f=open('url.txt','w')
print("press 1 for keyboard and Press 2 for voice search")
choice=input()
if choice== '1':
        web=input("Please enter here : ")
        print(web)
        webbrowser.open_new_tab("https://www.google.com/search?q="+web)
        url=[]
        for i in search(web,stop=10):
                print(i)
                time.sleep(3)
                f.write(i+'\n')
                url.append(i)
        print(url)
        f.close()
elif choice=='2':
    def recognize_speech_from_mic(recognizer,microphone):
        with microphone as source:
                      audio=recognizer.listen(source)
        response={
                       "success": True,
                       "error": None,
                       "transcription": None,
            }
        try:
            reponse["transcription"]=recognizer.recognize_google(audio)
        except sr.RequestError:                        
예제 #24
0
def google_query(query):
    link = []
    for j in search(query, tld="ca", num=10, stop=10, pause=2):
        link.append(j)
    return link
예제 #25
0
        elif latest < requested:
            print("That comic doesn't exist yet silly!")
        else:
            url = (f'{url}{comicNumber}')
            webbrowser.open_new(url)
    except:
        print("An error occured. Perhaps you didn't enter an integer, or you don't have a recognized web browser, or dark magic interfered. The world may never know.")
        webbrowser.open_new("https://xkcd.com/2200/")
elif searchType == "phrase":
    pattern = r"^https?://xkcd.com/\d+/$" # Matches url for an xkcd, with or without https
    referencePhrase = input("What is the relevant phrase?")
    num_results = input("How many xkcds would you like to search for?")
    query = ("site:xkcd.com " + str(referencePhrase))
    try:
        i = int(num_results)
        for result in search(query, num=20):
            if i <= 0:
                break
            if re.match(pattern, result) is not None: # Result matches pattern
                i -= 1
                webbrowser.open_new(result)
    except:
        print("An error occured. Perhaps you didn't enter an integer for the number of searches, or you don't have google, or maybe its all my fault and I really am a disappointment like my parents said.")
        webbrowser.open_new("https://xkcd.com/2200/")
elif searchType == "random":
    newest_comic = asyncio.run(latest_comic_num())
    comic_num = random.randint(1, newest_comic)
    random_url = (f"{url}{comic_num}")
    webbrowser.open_new(random_url)
else:
    webbrowser.open_new("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
from googlesearch import search
file = open('search.txt', 'r')
count = 0
while True:
    count += 1
    line = file.readline()
    if not line:
        break
    linee = line.strip()
    print(linee)
    for url in search(
            linee, tld="co.in", num=10, pause=10
    ):  #you can change The Domain You Want By Change The Tld Vaule In This Case co.in
        print(url)
        pass
    pass
예제 #27
0
                # cannot scrape the title of a web page containing a pdf file
                continue
            # sometimes google gives a numeric IP as of arxiv.
            url = 'https://arxiv.org/' + '/'.join(
                url.replace('https://', '').split('/')[-2:])
            try:
                url_title = google_scrape(url)
            except:
                url_title = ''
            print('url ', url)
            print('url title ', url_title)
            urls.append({
                'url_title': url_title,
                'url': url,
                'query': paper_title,
            })
    if os.path.isdir(output_json_dir):
        with open(os.path.join(output_json_dir, 'query_urls.json'),
                  'a') as f_out:
            json.dump(urls, f_out, indent=4, sort_keys=True)
    return urls


if __name__ == '__main__':
    query = 'Response object above tell'
    for url in search(query, stop=3):
        a = google_scrape(url)
        print("url", url)
        print("Title: " + a)
        print(" ")
예제 #28
0
import webbrowser
import googlesearch
from googlesearch import search
try:
    from googlesearch import search
except ImportError:
    print("No module named 'google' found")

# to search
query = "whatsapp"

for j in search(query, tld="co.in", num=10, stop=1, pause=2):
    webbrowser.open(j)
예제 #29
0
def extractFraseGoogle(frase, cantRes):
    print("Procurando emails...Aguarde")
    print("Esta operacao pode demorar varios minutos")
    try:
        listUrl = []
        listEmails = []

        for url in search(frase, stop=cantRes):
            listUrl.append(url)

        for i in listUrl:
            try:
                req = urllib.request.Request(i,
                                             data=None,
                                             headers={'User-Agent': ua.random})
                try:
                    conn = urllib.request.urlopen(req)
                except timeout:
                    print("Bad Url..")
                    time.sleep(2)
                    pass
                except (HTTPError, URLError):
                    print("Bad Url..")
                    time.sleep(2)
                    pass

                status = conn.getcode()
                contentType = conn.info().get_content_type()

                if (status != 200 or contentType == "audio/mpeg"):
                    print("Bad Url..")
                    time.sleep(2)
                    pass

                html = conn.read()

                soup = BeautifulSoup(html, "lxml")
                links = soup.find_all('a')

                print("Serao analizados " + str(len(links) + 1) + " Urls...")
                time.sleep(2)

                for tag in links:
                    link = tag.get('href', None)
                    if link is not None:
                        # Fix TimeOut
                        searchSpecificLink(link, listEmails, frase)

            except urllib.error.URLError as e:
                print("Problems with the url:" + i)
                print(e)
                pass
            except (http.client.IncompleteRead) as e:
                print(e)
                pass
            except Exception as e:
                print(e)
                pass

        print("")
        print("*******")
        print("Finish")
        print("*******")
        input("Aperte return  para continuar")
        menu()

    except KeyboardInterrupt:
        input("Aperte return  para continuar")
        menu()

    except Exception as e:
        print(e)
        input("Aperte enter para continuar")
        menu()
예제 #30
0
			speak("Here you go to Google")
			print("* J: Here you go to Google \n")
			webbrowser.open("google.com")	
		elif 'open stackoverflow' in query:
			speak("Here you go to Stack Over flow.Happy coding")
			print("* J: Here you go to stackoverflow ")
			webbrowser.open("stackoverflow.com") 
		elif "wikipedia" in query:
			print("* J: Here you go to wikipedia")
			webbrowser.open("wikipedia.com")
		elif 'search for' in query:
			x=re.search('search for',query)
			speak("Finding results")
			print("J: Finding results\n")
			query=query[x.start()+10:]
			for j in search(query,tld="co.in",num=1, stop=1):
				print(j)
				webbrowser.open(j)
		elif 'news' in query:
			speak('here are some top news from the times of india')
			print('J: Here are some top news from the times of india')
			webbrowser.open("https://timesofindia.indiatimes.com/india") 
			
# STORY
		elif 'tell me a story' in query:
			speak("Reading a story book")
			content=takeCommand()
			speakstory()
# PLAY MOVIE
		elif 'play movie' in query:
			speak("Playing your playlist sir")
예제 #31
0
		i+=1
    return i


def vuln_test(url):
    try:
	response =urllib2.urlopen(url)
	soup = BeautifulSoup( response.read(), 'html.parser' )
	url2=url+"'"
	response2=urllib2.urlopen(url2)
	soup2 = BeautifulSoup(response2.read(),'html.parser')
	if soup!=soup2 :
	    if word_err(soup2) > -1 :
		print "\n[%s*%s]Possible SQL vulnerability =>  %s "% (green,norm,url2)
    except urllib2.HTTPError :
	pass    
    except urllib2.URLError:
	pass
    
    
credit()
dork =raw_input("Inserire una dork (es: 'inurl:index.php?id=')  => ")
try:
    url_list=search(dork)
    for url in url_list:
	vuln_test(url)    
except urllib2.HTTPError:
    print "%sGoogle ban your IP%s (restart connection)  " %(red,norm)


예제 #32
0
def generate_data_set(url):

    data_set = []

    # Converts the given URL into standard format
    if not re.match(r"^https?", url):
        url = "http://" + url

    # Stores the response of the given URL
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
    except:
        response = ""
        soup = -999

    # Extracts domain from the given URL
    domain = re.findall(r"://([^/]+)/?", url)[0]
    if re.match(r"^www.", domain):
        domain = domain.replace("www.", "")

    # Requests all the information about the domain
    whois_response = whois.whois(domain)

    rank_checker_response = requests.post(
        "https://www.checkpagerank.net/index.php", {"name": domain})

    # Extracts global rank of the website
    try:
        global_rank = int(
            re.findall(r"Global Rank: ([0-9]+)",
                       rank_checker_response.text)[0])
    except:
        global_rank = -1

    # 1.having_IP_Address
    try:
        ipaddress.ip_address(url)
        data_set.append(-1)
    except:
        data_set.append(1)

    # 2.URL_Length
    if len(url) < 54:
        data_set.append(1)
    elif len(url) >= 54 and len(url) <= 75:
        data_set.append(0)
    else:
        data_set.append(-1)

    # 3.Shortining_Service
    match = re.search(
        'bit\.ly|goo\.gl|shorte\.st|go2l\.ink|x\.co|ow\.ly|t\.co|tinyurl|tr\.im|is\.gd|cli\.gs|'
        'yfrog\.com|migre\.me|ff\.im|tiny\.cc|url4\.eu|twit\.ac|su\.pr|twurl\.nl|snipurl\.com|'
        'short\.to|BudURL\.com|ping\.fm|post\.ly|Just\.as|bkite\.com|snipr\.com|fic\.kr|loopt\.us|'
        'doiop\.com|short\.ie|kl\.am|wp\.me|rubyurl\.com|om\.ly|to\.ly|bit\.do|t\.co|lnkd\.in|'
        'db\.tt|qr\.ae|adf\.ly|goo\.gl|bitly\.com|cur\.lv|tinyurl\.com|ow\.ly|bit\.ly|ity\.im|'
        'q\.gs|is\.gd|po\.st|bc\.vc|twitthis\.com|u\.to|j\.mp|buzurl\.com|cutt\.us|u\.bb|yourls\.org|'
        'x\.co|prettylinkpro\.com|scrnch\.me|filoops\.info|vzturl\.com|qr\.net|1url\.com|tweez\.me|v\.gd|tr\.im|link\.zip\.net',
        url)
    if match:
        data_set.append(-1)
    else:
        data_set.append(1)

    # 4.having_At_Symbol
    if re.findall("@", url):
        data_set.append(-1)
    else:
        data_set.append(1)

    # 5.double_slash_redirecting
    list = [x.start(0) for x in re.finditer('//', url)]
    if list[len(list) - 1] > 6:
        data_set.append(-1)
    else:
        data_set.append(1)

    # 6.Prefix_Suffix
    if re.findall(r"https?://[^\-]+-[^\-]+/", url):
        data_set.append(-1)
    else:
        data_set.append(1)

    # 7.having_Sub_Domain
    if len(re.findall("\.", url)) == 1:
        data_set.append(1)
    elif len(re.findall("\.", url)) == 2:
        data_set.append(0)
    else:
        data_set.append(-1)

    # 8.SSLfinal_State
    try:
        if response.text:
            data_set.append(1)
    except:
        data_set.append(-1)

    # 9.Domain_registeration_length
    expiration_date = whois_response.expiration_date
    registration_length = 0
    try:
        expiration_date = min(expiration_date)
        today = time.strftime('%Y-%m-%d')
        today = datetime.strptime(today, '%Y-%m-%d')
        registration_length = abs((expiration_date - today).days)

        if registration_length / 365 <= 1:
            data_set.append(-1)
        else:
            data_set.append(1)
    except:
        data_set.append(-1)

    # 10.Favicon
    if soup == -999:
        data_set.append(-1)
    else:
        try:
            for head in soup.find_all('head'):
                for head.link in soup.find_all('link', href=True):
                    dots = [
                        x.start(0)
                        for x in re.finditer('\.', head.link['href'])
                    ]
                    if url in head.link['href'] or len(
                            dots) == 1 or domain in head.link['href']:
                        data_set.append(1)
                        raise StopIteration
                    else:
                        data_set.append(-1)
                        raise StopIteration
        except StopIteration:
            pass

    #11. port
    try:
        port = domain.split(":")[1]
        if port:
            data_set.append(-1)
        else:
            data_set.append(1)
    except:
        data_set.append(1)

    #12. HTTPS_token
    if re.findall(r"^https://", url):
        data_set.append(1)
    else:
        data_set.append(-1)

    #13. Request_URL
    i = 0
    success = 0
    if soup == -999:
        data_set.append(-1)
    else:
        for img in soup.find_all('img', src=True):
            dots = [x.start(0) for x in re.finditer('\.', img['src'])]
            if url in img['src'] or domain in img['src'] or len(dots) == 1:
                success = success + 1
            i = i + 1

        for audio in soup.find_all('audio', src=True):
            dots = [x.start(0) for x in re.finditer('\.', audio['src'])]
            if url in audio['src'] or domain in audio['src'] or len(dots) == 1:
                success = success + 1
            i = i + 1

        for embed in soup.find_all('embed', src=True):
            dots = [x.start(0) for x in re.finditer('\.', embed['src'])]
            if url in embed['src'] or domain in embed['src'] or len(dots) == 1:
                success = success + 1
            i = i + 1

        for iframe in soup.find_all('iframe', src=True):
            dots = [x.start(0) for x in re.finditer('\.', iframe['src'])]
            if url in iframe['src'] or domain in iframe['src'] or len(
                    dots) == 1:
                success = success + 1
            i = i + 1

        try:
            percentage = success / float(i) * 100
            if percentage < 22.0:
                dataset.append(1)
            elif ((percentage >= 22.0) and (percentage < 61.0)):
                data_set.append(0)
            else:
                data_set.append(-1)
        except:
            data_set.append(1)

    #14. URL_of_Anchor
    percentage = 0
    i = 0
    unsafe = 0
    if soup == -999:
        data_set.append(-1)
    else:
        for a in soup.find_all('a', href=True):
            # 2nd condition was 'JavaScript ::void(0)' but we put JavaScript because the space between javascript and :: might not be
            # there in the actual a['href']
            if "#" in a['href'] or "javascript" in a['href'].lower(
            ) or "mailto" in a['href'].lower() or not (url in a['href']
                                                       or domain in a['href']):
                unsafe = unsafe + 1
            i = i + 1

        try:
            percentage = unsafe / float(i) * 100
        except:
            data_set.append(1)

        if percentage < 31.0:
            data_set.append(1)
        elif ((percentage >= 31.0) and (percentage < 67.0)):
            data_set.append(0)
        else:
            data_set.append(-1)

    #15. Links_in_tags
    i = 0
    success = 0
    if soup == -999:
        data_set.append(-1)
    else:
        for link in soup.find_all('link', href=True):
            dots = [x.start(0) for x in re.finditer('\.', link['href'])]
            if url in link['href'] or domain in link['href'] or len(dots) == 1:
                success = success + 1
            i = i + 1

        for script in soup.find_all('script', src=True):
            dots = [x.start(0) for x in re.finditer('\.', script['src'])]
            if url in script['src'] or domain in script['src'] or len(
                    dots) == 1:
                success = success + 1
            i = i + 1
        try:
            percentage = success / float(i) * 100
        except:
            data_set.append(1)

        if percentage < 17.0:
            data_set.append(1)
        elif ((percentage >= 17.0) and (percentage < 81.0)):
            data_set.append(0)
        else:
            data_set.append(-1)

        #16. SFH
        for form in soup.find_all('form', action=True):
            if form['action'] == "" or form['action'] == "about:blank":
                data_set.append(-1)
                break
            elif url not in form['action'] and domain not in form['action']:
                data_set.append(0)
                break
            else:
                data_set.append(1)
                break

    #17. Submitting_to_email
    if response == "":
        data_set.append(-1)
    else:
        if re.findall(r"[mail\(\)|mailto:?]", response.text):
            data_set.append(1)
        else:
            data_set.append(-1)

    #18. Abnormal_URL
    if response == "":
        data_set.append(-1)
    else:
        if response.text == "":
            data_set.append(1)
        else:
            data_set.append(-1)

    #19. Redirect
    if response == "":
        data_set.append(-1)
    else:
        if len(response.history) <= 1:
            data_set.append(-1)
        elif len(response.history) <= 4:
            data_set.append(0)
        else:
            data_set.append(1)

    #20. on_mouseover
    if response == "":
        data_set.append(-1)
    else:
        if re.findall("<script>.+onmouseover.+</script>", response.text):
            data_set.append(1)
        else:
            data_set.append(-1)

    #21. RightClick
    if response == "":
        data_set.append(-1)
    else:
        if re.findall(r"event.button ?== ?2", response.text):
            data_set.append(1)
        else:
            data_set.append(-1)

    #22. popUpWidnow
    if response == "":
        data_set.append(-1)
    else:
        if re.findall(r"alert\(", response.text):
            data_set.append(1)
        else:
            data_set.append(-1)

    #23. Iframe
    if response == "":
        data_set.append(-1)
    else:
        if re.findall(r"[<iframe>|<frameBorder>]", response.text):
            data_set.append(1)
        else:
            data_set.append(-1)

    #24. age_of_domain
    if response == "":
        data_set.append(-1)
    else:
        try:
            registration_date = re.findall(
                r'Registration Date:</div><div class="df-value">([^<]+)</div>',
                whois_response.text)[0]
            if diff_month(date.today(), date_parse(registration_date)) >= 6:
                data_set.append(-1)
            else:
                data_set.append(1)
        except:
            data_set.append(1)

    #25. DNSRecord
    dns = 1
    try:
        d = whois.whois(domain)
    except:
        dns = -1
    if dns == -1:
        data_set.append(-1)
    else:
        if registration_length / 365 <= 1:
            data_set.append(-1)
        else:
            data_set.append(1)

    #26. web_traffic
    try:
        rank = BeautifulSoup(
            urllib.request.urlopen(
                "http://data.alexa.com/data?cli=10&dat=s&url=" + url).read(),
            "xml").find("REACH")['RANK']
        rank = int(rank)
        if (rank < 100000):
            data_set.append(1)
        else:
            data_set.append(0)
    except TypeError:
        data_set.append(-1)

    #27. Page_Rank
    try:
        if global_rank > 0 and global_rank < 100000:
            data_set.append(-1)
        else:
            data_set.append(1)
    except:
        data_set.append(1)

    #28. Google_Index
    site = search(url, 5)
    if site:
        data_set.append(1)
    else:
        data_set.append(-1)

    #29. Links_pointing_to_page
    if response == "":
        data_set.append(-1)
    else:
        number_of_links = len(re.findall(r"<a href=", response.text))
        if number_of_links == 0:
            data_set.append(1)
        elif number_of_links <= 2:
            data_set.append(0)
        else:
            data_set.append(-1)

    #30. Statistical_report
    url_match = re.search(
        'at\.ua|usa\.cc|baltazarpresentes\.com\.br|pe\.hu|esy\.es|hol\.es|sweddy\.com|myjino\.ru|96\.lt|ow\.ly',
        url)
    try:
        ip_address = socket.gethostbyname(domain)
        ip_match = re.search(
            '146\.112\.61\.108|213\.174\.157\.151|121\.50\.168\.88|192\.185\.217\.116|78\.46\.211\.158|181\.174\.165\.13|46\.242\.145\.103|121\.50\.168\.40|83\.125\.22\.219|46\.242\.145\.98|'
            '107\.151\.148\.44|107\.151\.148\.107|64\.70\.19\.203|199\.184\.144\.27|107\.151\.148\.108|107\.151\.148\.109|119\.28\.52\.61|54\.83\.43\.69|52\.69\.166\.231|216\.58\.192\.225|'
            '118\.184\.25\.86|67\.208\.74\.71|23\.253\.126\.58|104\.239\.157\.210|175\.126\.123\.219|141\.8\.224\.221|10\.10\.10\.10|43\.229\.108\.32|103\.232\.215\.140|69\.172\.201\.153|'
            '216\.218\.185\.162|54\.225\.104\.146|103\.243\.24\.98|199\.59\.243\.120|31\.170\.160\.61|213\.19\.128\.77|62\.113\.226\.131|208\.100\.26\.234|195\.16\.127\.102|195\.16\.127\.157|'
            '34\.196\.13\.28|103\.224\.212\.222|172\.217\.4\.225|54\.72\.9\.51|192\.64\.147\.141|198\.200\.56\.183|23\.253\.164\.103|52\.48\.191\.26|52\.214\.197\.72|87\.98\.255\.18|209\.99\.17\.27|'
            '216\.38\.62\.18|104\.130\.124\.96|47\.89\.58\.141|78\.46\.211\.158|54\.86\.225\.156|54\.82\.156\.19|37\.157\.192\.102|204\.11\.56\.48|110\.34\.231\.42',
            ip_address)
        if url_match:
            data_set.append(-1)
        elif ip_match:
            data_set.append(-1)
        else:
            data_set.append(1)
    except:
        print('Connection problem. Please check your internet connection!')

    print(data_set)
    return data_set
예제 #33
0
파일: g.py 프로젝트: git-vivek/First
#! /usr/bin/python3
import google
import sys
from googlesearch import search

query=sys.argv[1]
for i in search(query,tld="co.in",num=5,start=0,stop=5):
	print(i)

예제 #34
0
 byewishing = [
     'bye bye', 'Have a good day', 'I am happy to help you',
     'see you again later', 'see you next time!farewell'
 ]
 while True:
     # query=takecommand().lower()
     query = input()
     checkgreet = query.split(" ")
     if any(item in checkgreet for item in (grettings or wishes)):
         greet = random.choice(tuple(grettings))
         speak(greet)
     elif 'search' in query:
         query = query.replace("search", "")
         if 'google' in query:
             query = query.replace('google', '')
             for link in search(query, num_results=5, lang="en-in"):
                 webbrowser.open_new_tab(link)
         elif 'ganna' in query:
             speak('Searching on gaana')
             query = query.replace('bye', '')
             query = query.replace('play', '')
             query = query.replace('song', '')
             query = query.replace('on', '')
             query = query.replace('in', '')
             query = query.replace('ganna', '')
             play_audio(query)
         elif 'youtube' in query or 'you tube' in query:
             speak('searching on youtube')
             query = query.replace("youtube", "")
             query = query.replace('on', '')
             query = query.replace('in', '')