def createMention(url,res): if resolve(url)!=None: furl=resolve(url) else: furl=url url=hashlib.md5(furl).hexdigest() response=urllib2.urlopen("http://feeds.delicious.com/v2/json/url/"+url) delRes=json.loads(response.read()) delRes=[]#EL RSS DE DELICIOUS NO ESTA FUNCIONANDO PARA LAS URL, EL DE USUARIOS SIGUE ACTIVO s=twittApi twitRes=s.search(furl) for a in delRes: print a author=a["a"] s=SocialProfile.objects.filter(username=author) if len(s)==0: user_url="https://delicious.com/"+author s=SocialProfile.objects.create(username=author,social_network="Delicious",url=user_url) print "Creado el usuario "+s Mention.objects.create(profile=s,resource=res) print "Creada mention para delicious" for r in twitRes: author=r.user.screen_name s=SocialProfile.objects.filter(username=author) if len(s)==0: user_url="https://twitter.com/"+author sn=SocialNetwork.objects.create(name="Twitter",url="http://twitter.com/") s=SocialProfile.objects.create(username=author,social_network=sn,url=user_url) print "Creado el usuario "+str(s) Mention.objects.create(profile=s,resource=res) print "Creada mention para twitter"
def createResource(url): if resolve(url)!=None: url=resolve(url) g = Goose() a= g.extract(url=url) if len(url)>200: print "Los links largos de duckduckgo no funcionan" return None else: r=Resource.objects.filter(url=url) if len(r)>0: print "El recurso ya lo tenia" r=r[0] else: if a.title==None or a.title=="": title="notitle" else: title=a.title try: r=Resource.objects.create(title=title,url=url) except: print "no ha ido bien" print title print url print "Creado el recurso para "+url return r
def clean2(self, url): furl=url i=0 while resolve(url)!=None and i<5: furl=url url=resolve(url) i+=1 print i return furl
def get_expand(url,user,tag): tagl=[] tagl.append(str(tag)) relatedToTweet=[] response=twittApi.user_timeline(screen_name=user,count=10) for tweet in response: ht=extract_hash_tags(tweet.text) intersect=list(set(tagl) & set(ht)) if len(intersect)>0: #relatedToTweet.append(tweet) ##mirar si en el texto hay enlaces ##para cada enlace dle texto links= extract_urls(tweet.text.encode('utf-8')) for link in links: link=resolve(link) if link!=url: print link print "Fecha: "+str(tweet.created_at) #call_command('add',URL=link) feed=feedfinder.feed(link) print feed if feed: rc=ResourceContainer.objects.get_or_create(rss=feed,url=link) add_feed(feed)
def process_url(self, url): expanded_url = resolve(url) response = urllib2.urlopen(url) page = response.read() user_id = re.findall("\"id\":\"[0-9]+\"", page) user_id = "{" + user_id[0] + "}" user_id = json.loads(user_id) user_id = user_id["id"] print user_id api = FoursquareAPI() listids = api.user_friendship(user_id) api.user_tips(user_id) api.user_venueslikes(user_id) server = socket.socket() server.connect((HOST,PORT)) server.send(json.dumps({"command": "UPDATE_ID","clientid":client_id, "idlist":user_id, "level": 0})) message = json.loads(server.recv(BUFFER)) server.close() print listids api.user_basic_info(user_id) for user_id in listids: server = socket.socket() server.connect((HOST,PORT)) server.send(json.dumps({"command": "INSERT_ID","clientid":client_id, "idlist":user_id})) server.close()
def writeCells(ws, user_id, username, message, keyword, url, email): sheet_copy = ws row_number = ws.max_row+1 url = resolve(url) date_now = datetime.datetime.now() formated_time = date_now.strftime('%Y-%m-%d %H:%M') sheet_copy.cell(row=row_number, column=1, value=user_id) sheet_copy.cell(row=row_number, column=2, value=username.strip().encode('utf-8')) sheet_copy.cell(row=row_number, column=3, value=message.strip().encode('utf-8')) sheet_copy.cell(row=row_number, column=4, value=keyword) sheet_copy.cell(row=row_number, column=5, value=formated_time) sheet_copy.cell(row=row_number, column=6, value=url) sheet_copy.cell(row=row_number, column=7, value=email) return sheet_copy
def get_expand(url,user,tag,social_network): tagl=[] tagl.append(str(tag)) relatedToTweet=[] if social_network=="Twitter": print"----------------------------" print "En twitter para el usuario "+user+" y tag "+str(tag)+": " response=twittApi.user_timeline(screen_name=user,count=10) for tweet in response: ht=extract_hash_tags(tweet.text) intersect=list(set(tagl) & set(ht)) if len(intersect)>0: #relatedToTweet.append(tweet) ##mirar si en el texto hay enlaces ##para cada enlace dle texto links= extract_urls(tweet.text.encode('utf-8')) for link in links: link=resolve(link) if link!=url: print link print "Fecha: "+str(tweet.created_at) #call_command('add',URL=link) feed=feedfinder.feed(link) print feed if feed: rc=ResourceContainer.objects.get_or_create(rss=feed,url=link) add_feed(feed) print "__________________________" print "" elif social_network=="delicious": print"----------------------------" print "En delicious para el usuario "+user+" y tag "+str(tag)+": " url_to_call="http://feeds.delicious.com/v2/json/"+str(user)+"/"+urllib2.quote(str(tag),'') response=urllib2.urlopen(url_to_call) response=json.loads(response.read()) for res in response: if url!=str(res["u"]): print str(res["u"]) print "Fecha: "+res["dt"] call_command('add',URL=str(res["u"])) feed=feedfinder.feed(str(res["u"])) if feed: rc=ResourceContainer.objects.get_or_create(rss=feed,url=str(res["u"])) add_feed(feed) print "__________________________" print "" else: print "Este enlace no tiene nada de twitter ni deli"
def url_expand(match): """Generate links with expanded URLs. Args: match (SRE_Match): Regular expression match object Returns: str: HTML formatted link for URL """ url = match.group() if url not in URLS: if urlunshort.is_shortened(url): URLS[url] = GLib.markup_escape_text(urlunshort.resolve(url)) else: URLS[url] = GLib.markup_escape_text(url) return '<a href="{}">{}</a>'.format(URLS[url], URLS[url])
def verifyUrl(url): resolved_url = resolve(url) if resolved_url is not None: parts = urlsplit(resolved_url) hostname = parts.hostname valid = checkHostname(hostname) if valid: base_url = "{0.scheme}://{0.netloc}".format(parts) return base_url else: return None else: parts = urlsplit(url) hostname = parts.hostname valid = checkHostname(hostname) if valid: base_url = "{0.scheme}://{0.netloc}".format(parts) return base_url else: return None
def url_expand(match): """Generate links with expanded URLs # Test mocks >>> URLS["http://bit.ly/dunMgV"] = "terminal.png" >>> from mock import Mock >>> match = Mock() >>> match.group = Mock(return_value=URLS.keys()[0]) >>> url_expand(match) '<a href="terminal.png">terminal.png</a>' :param SRE_Match match: Regular expression match object :rtype: ``str`` :return: HTML formatted link for URL """ url = match.group() if not url in URLS: if urlunshort.is_shortened(url): URLS[url] = glib.markup_escape_text(urlunshort.resolve(url)) else: URLS[url] = glib.markup_escape_text(url) return '<a href="%s">%s</a>' % (URLS[url], URLS[url])
from urlunshort import resolve import httplib import urlparse import csv #list of shortlinks: urls = ['http://zite.to/12Uq1nW'] #store file outfile = 'longlinks' ".csv" myfile = open( outfile, "wb", ) w = csv.writer(myfile) for url in urls: results = [] results += [resolve(url)] w.writerow(results) myfile.close()
def entrypoint_urlunshort(): parser = optparse.OptionParser(usage="%prog url") options, args = parser.parse_args() if args: print(resolve(args[0]))
}, "threatInfo": { "threatTypes": ["MALWARE", "SOCIAL_ENGINEERING"], "platformTypes": ["ALL_PLATFORMS"], "threatEntryTypes": ["URL"], "threatEntries": [ { "url": url }, ] } } headers = {'content-type': 'application/json'} r = requests.post(req_url, data=json.dumps(payload), headers=headers) resp = json.loads(r.text) return resp for k in dic: print k k = resolve(k[:-2]) #print hum resp = google_url_shorten(k) #print resp if len(resp) != 0: malicious[k] = resp print malicious[k] with open("C:\Users\KC-L\Documents\maliciouscollection1.txt", "wb") as myFile: pickle.dump(malicious, myFile)
import json import configparser import pyphishtank from pyphishtank import PhishTank #import urllib.parse as ub #from urllib.parse import resolve from urlunshort import resolve new_tweets = [] for line in open('data.json'): # reading the lines in given json file new_tweets.append(json.loads( line)) # appending the dictionaries in json file to a the list tweets # print(new_tweets) urls = [] for tweet in new_tweets: if 'expanded_url' in tweet: urls.append(tweet['expanded_url']) else: pass api = PhishTank() for url in urls: unshortenUrl = resolve(url) print(api.check(str(unshortenUrl)))
from urlunshort import resolve import httplib import urlparse import csv #list of shortlinks: urls = ['http://zite.to/12Uq1nW' ] #store file outfile = 'longlinks'".csv" myfile = open(outfile, "wb",) w = csv.writer(myfile) for url in urls: results = [] results += [resolve(url)] w.writerow(results) myfile.close()
try: rand = ''.join( random.choice(string.ascii_lowercase + string.ascii_uppercase + string.digits) for _ in range(3)) rand = '6n' + rand print rand url = domain + rand #print url r = requests.get(url) #print requests status = r.status_code print status if status != 404 and status != 403: file = open("C:\Users\KC-L\Documents\\qgs_collection", "a") file.write(url + '\n') file.write(resolve(url)) print url file.flush() new = resolve(url) print new resp = google_url_shorten(new) print resp if len(resp) != 0: malicious[k] = resp print malicious[k] with open("C:\Users\KC-L\Documents\maliciouscollection2.txt", "wb") as myFile: pickle.dump(malicious, myFile) except: continue time.sleep(1)
def entrypoint_urlunshort(): parser = optparse.OptionParser(usage="%prog url") options, args = parser.parse_args(); if args: print(resolve(args[0]))