Python Links Exemples, linkGrabber.Links Python Exemples

Exemple #1

0

Afficher le fichier

def getprice(product):
    site = "mysmartprice"

    start = "'href': '/url?q="
    end = "&sa"
    links = linkGrabber.Links(
        'https://www.google.co.in/search?newwindow=1&biw=1366&bih=659&q=' +
        site + '+' + product)
    gb = links.find(limit=30)
    print(gb[25])
    gb1 = str(gb)
    frame = pd.DataFrame()
    gb2 = re.search("http://www.mysmartprice.com/(.+?)(%|&)", gb1)
    #print(gb2.group(1))
    if gb2:
        found2 = gb2.group(1)
        print(found2)
        frame = findprice(found2)

    else:
        print('Not found')
    return frame


#getprice()

Exemple #2

0

Afficher le fichier

Fichier : TypeATool_GoogleSearch.py Projet : TypeAtools/Internal_Linking_Tool

def get_links(url):
    try:
        links = linkGrabber.Links(url)
        l = links.find()
    except Exception:
        l = ''
    return l

Exemple #3

0

Afficher le fichier

Fichier : autoJasper.py Projet : matanelrb/autoJasper

def findgitlink(url, identifier):
    links = linkGrabber.Links(url)
    print('url: ' + url)
    gb = links.find()

    for item in gb:
        print(item['href'])
        if item['href'].startswith(identifier):
            return item['href']

Exemple #4

0

Afficher le fichier

Fichier : amazon.py Projet : DIGVIJAYMALI/Sentiment-Analysis-of-Product-Reviews-for-E-commerce-Recommendation

def getlink(product):
    #pd.set_option('display.width',1000)
    pd.options.display.max_colwidth = 200
    site = "amazon.in"
    #product ="NEXUS 5"
    start = "'href': '/url?q="
    end = "&sa"
    frame = pd.DataFrame()
    links = linkGrabber.Links(
        'https://www.google.com/search?newwindow=1&biw=1366&bih=659&q=' +
        site + '+' + product + '+product+reviews')
    gb = links.find(limit=30)
    print("~~~~~~~~~~~~print g2[25]~~~~~~~~~~~~~~~ ")
    print(gb[25])
    gb1 = str(gb)
    print("~~~~~~~~~~~print g2 group(1)~~~~~~~~~~~~~")
    print(str(gb1))

    gb2 = re.search("https://www.amazon.com/(.+?)(%|&)", gb1)
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
    print("LINK GRABBED IS: ")
    print(gb2)
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
    print("print g2 group(1)")
    print(gb2.group(1))
    if gb2:
        '''found = gb2.group(1)
		print(found[-10:])
		found1=found+'xyz'
		#time.sleep(4)
		gb3 = re.search('/(.+?)xyz',found1)
		found2=gb3.group(1)
		print(found2)
		time.sleep(500)
		frame =scrap(found2)'''
        found = gb2.group(1)
        print(found[-10:])
        frame = scrap(found[-10:])
    else:
        print('Not found')
    return frame

Exemple #5

0

Afficher le fichier

Fichier : snapdeal.py Projet : DIGVIJAYMALI/Sentiment-Analysis-of-Product-Reviews-for-E-commerce-Recommendation

def getlink(product):
    site = "snapdeal"
    start = "'href': '/url?q="
    end = "&sa"
    links = linkGrabber.Links(
        'https://www.google.co.in/search?newwindow=1&biw=1366&bih=659&q=' +
        site + '+' + product + '+reviews')
    gb = links.find(limit=30)
    gb1 = str(gb)
    print(gb1)
    frame = pd.DataFrame()
    f = open("link2.txt", "w+")
    print(f, gb1)
    gb2 = re.search("www." + site + ".com(.+?)&", gb1)

    if gb2:
        found = gb2.group(1)
        link = "https://www." + site + ".com" + found
        print(link)
        frame = scrap(link)
    else:
        print('not found')
    return frame

Exemple #6

0

Afficher le fichier

Fichier : specifications.py Projet : DIGVIJAYMALI/Sentiment-Analysis-of-Product-Reviews-for-E-commerce-Recommendation

def getlink(txt):
    site = "flipkat"
    product = txt
    start = "'href': '/url?q="
    end = "&sa"
    links = linkGrabber.Links(
        'https://www.google.co.in/search?newwindow=1&biw=1366&bih=659&q=' +
        site + '+' + product + '+product+details')
    gb = links.find(limit=100)
    print(gb[25])
    gb1 = str(gb)
    frame = pd.DataFrame()
    gb2 = re.search("http://www.flipkart.com(.+?)(%|&)", gb1)

    if gb2:
        found2 = gb2.group(1)
        print(found2)
        #time.sleep(500)
        frame = scrap(found2)

    else:
        print('Not found')
    return frame

Exemple #7

0

Afficher le fichier

import re
import linkGrabber

links = linkGrabber.Links(
    'https://www.google.com/search?client=firefox-b-d&q=king')
gb = links.find(limit=4, duplicates=False, pretty=True)
print(gb)

Exemple #8

0

Afficher le fichier

import linkGrabber

links = linkGrabber.Links(r"http://google.com")
gb = links.find(limit=4, duplicates=False, pretty=True)
print(gb)

Exemple #9

0

Afficher le fichier

import re
import linkGrabber

links = linkGrabber.Links("https://devx.work")
#gb = links.find(limit=10, duplicates= False, pretty=True)
gb = links.find(pretty=True)

print(gb)

Exemple #10

0

Afficher le fichier

import re
import linkGrabber

links = linkGrabber.Links("https://www.google.com/")
gb = links.find(limit=5, duplicates=False, pretty=True)
print(gb)

Exemple #11

0

Afficher le fichier

Fichier : ExtractURLs.py Projet : pbergamaschi/quests

import re
import linkGrabber
import pprint

links = linkGrabber.Links('http://www.xkcd.com/')
gb = links.find(limit=4, duplicates=False, pretty=True)

# I think i need some regular expressions under here but I'm still trying to
# learn them... But this works to get the URLs

Exemple #12

0

Afficher le fichier

Fichier : datasets.py Projet : zbyte64/pytorch-fuzzdom

            "www.google.com",
            "www.airbnb.com",
            # "www.hotels.com",
            # "www.amazon.com",
            "www.cars.com",
            "www.twitch.tv",
            "store.steampowered.com",
            "www.reuters.com",
            # "imgur.com",
            # "www.lowes.com",
            "www.cbssports.com",
            "www.nfl.com",
            # "www.expedia.com",
            "www.walmart.com",
            "www.wayfair.com",
            "bing.com",
            # "reddit.com",
        ]
        for u in initial_domains:
            print(u)
            links = linkGrabber.Links(f"https://{u}/")
            l = links.find(limit=100,
                           href=re.compile("//" + u),
                           duplicates=False)
            _c = lambda x: x if x.startswith("http") else "https:" + x
            paths.extend([_c(e["href"]) for e in l])
    d = DomDataset(DATA_DIR + "/dom-dataset", paths)
    if download:
        d.download()
    d.process()

Exemple #13

0

Afficher le fichier

import re
import linkGrabber
import ssl

gcontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
#url = "http://mhrd.gov.in"
url = "http://upfireservice.gov.in/"
links = linkGrabber.Links(url)
gb = links.find(duplicate=False)
count = 0
for rec in gb:
    url_text = rec['text']
    url_href = rec['href']
    if '.pdf' in url_href:
        count += 1
        if not url_href.startswith('http'):
            url_href = url + url_href

        #print url_text
        print url_href
print count

Exemple #14

0

Afficher le fichier

import linkGrabber

links = linkGrabber.Links(
    'http://allrecipes.com/recipes/276/desserts/cakes/?page=3#2')
gb = links.find(limit=10,
                duplicates=False,
                pretty=True, {'class': 'pinterest'})
print(gb)

Exemple #15

0

Afficher le fichier

Fichier : autoJasper.py Projet : matanelrb/autoJasper

    g = Github()
    repo = g.get_repo("mdadams/jasper")

    commit = repo.get_commit(sha=git_link.rsplit('/', 1)[-1])
    parent_sha = commit.commit.parents[0].sha
    print("sha : " + parent_sha)

directory = "C:\LabWork\\" + parent_sha

git_issue_download_link = ''

if git_issue_link is None or git_issue_link == '':
    print('The git issue link is not found, cant reproduce the exploit')

else:
    links = linkGrabber.Links(git_issue_link)
    gb = links.find()

    for item in gb:
        if item['href'].endswith('.zip'):
            git_issue_download_link = item['href']

if not os.path.exists(directory):
    os.makedirs(directory)

if len(os.listdir(directory)) == 0:
    repo = Repo.clone_from(url="https://github.com/mdadams/jasper.git",
                           to_path="C:\LabWork" + "\\" + parent_sha)
    print("Repository cloned")
    print("Checking out to the wanted commit")
    repo.git.checkout(parent_sha)

Exemple #16

0

Afficher le fichier

import re
import linkGrabber

#                                                        Explaination
# in this example we are going to grab all urls and links inside the page . this is used in web surfing and crawling scenarios here we are not using selenium
#here we use linkGrabber library which is not in python packages  by default so we install it using pip open your cmd and type the below command
#----------------------------------------------------------------------------------------------------------------------------------------------
#
#                                                    pip install linkGrabber
#
#----------------------------------------------------------------------------------------------------------------------------------------------
#Parameters:
#* filters (dict): Beautiful Soup's filters as a dictionary
#* limit (int): Limit the number of links in sequential order
#* reverse (bool): Reverses how the list of <a> tags are sorted
#* sort (function): Accepts a function that accepts which key to sort upon
#within the List class

links = linkGrabber.Links('https://google.com')
gb = links.find(limit=8, duplicates=False, pretty=True)

print(gb)

Exemple #17

0

Afficher le fichier

import re
import linkGrabber

links = linkGrabber.Links("https://www.pitchvision.com/#/")
gb = links.find(pretty=True)

print(gb)

Exemple #18

0

Afficher le fichier

Fichier : findurl.py Projet : aditya21891/Pythonscript

# a python script to read all the links from a webpage and print them

import urllib2
import re
import linkGrabber

links = linkGrabber.Links(
    "https://www.indeed.com/jobs?q=devops+engineer&sort=date")
gb = links.find(Limit=20, pretty=True)

print(gb)