def get_currently_selected_genre(self): """ Return the Url for the current genre If the current genre element with a "selected" css class is a subgenre, this will return the subgenre's parent genre. """ selected = None genres = self.get_top_level_genre_tags() if genres: for tag in genres: if "selected" in tag['class']: selected = Url(tag.get('href'), tag.string) break #No hits in genres means a subgenre is currently selected else: subgenre = self._get_currently_selected_subgenre_tag() if subgenre: parent_li = subgenre.parent.parent.parent selected_tag = parent_li.find("a", class_="top-level-genre") if selected_tag: selected = Url(selected_tag.get('href'), selected_tag.string) return selected
def _get(self, version, method, url_or_urls, **kwargs): """ _get makes the actual call to api.embed.ly """ if not url_or_urls: raise ValueError('%s requires a url or a list of urls given: %s' % (method.title(), url_or_urls)) #A flag we can use instead of calling isinstance all the time. multi = isinstance(url_or_urls, list) # Throw an error early for too many URLs if multi and len(url_or_urls) > 20: raise ValueError('Embedly accepts only 20 urls at a time. Url ' \ 'Count:%s' % len(url_or_urls)) query = '' key = kwargs.get('key', self.key) #make sure that a key was set on the client or passed in. if not key: raise ValueError('Requires a key. None given: %s' % (key)) kwargs['key'] = key query += urllib.urlencode(kwargs) if multi: query += '&urls=%s&' % ','.join( [urllib.quote(url) for url in url_or_urls]) else: query += '&url=%s' % urllib.quote(url_or_urls) url = 'http://api.embed.ly/%s/%s?%s' % (version, method, query) http = httplib2.Http(timeout=self.timeout) headers = {'User-Agent': self.user_agent} resp, content = http.request(url, headers=headers) if resp['status'] == '200': data = json.loads(content) if kwargs.get('raw', False): data['raw'] = content else: data = { 'type': 'error', 'error': True, 'error_code': int(resp['status']) } if multi: return map(lambda url, data: Url(data, method, url), url_or_urls, data) return Url(data, method, url_or_urls)
def test_get_urls_list(client): u = Url(origin_url=fake.uri(), short_url='fake_url') u2 = Url(origin_url=fake.uri(), short_url='fake_url_2') redis_cli.set(u.redis_key, u.json()) redis_cli.set(u2.redis_key, u2.json()) resp = client.get(f'/v1/url') assert resp.status_code == 200 assert u in resp.json.get('urls') assert u2 in resp.json.get('urls')
def test_slug_shortening(self): """ Tests that a slug is properly generated """ u1 = Url(url='http://lab.tmp.br/%s/index.html' % MOCK_MARK) u1.save() u2 = Url(url='http://another.lab.tmp.br/%s/index.html' % MOCK_MARK) u2.save() u3 = Url(url='http://yetanother.lab.tmp.br/%s/index.html' % MOCK_MARK) u3.save() self.assertEqual(u1.slug, MOCK_MARK[:MIN_SLUG]) self.assertEqual(u2.slug, MOCK_MARK[:MIN_SLUG + 1])
def test_slug_shortening_failure(self): """ When a slug cannot be generated, SlugCollision is raised """ u = Url(url='http://lab.tmp.br/%s/index.html' % MOCK_MARK) u.save() slug = u.slug while len(slug) < MAX_SLUG: uu = Url(url='http://another.lab.tmp.br/%s/index%s.html' % (MOCK_MARK, len(slug))) uu.save() slug = uu.slug uuu = Url(url='http://last.lab.tmp.br/%s/index%s.html' % (MOCK_MARK, len(slug))) self.assertRaises(SlugCollision, uuu.save)
def shorten(): url = request.form.get('url') if not url: return 'No URL provided', 400 if not is_url(url): return 'The URL is invalid', 400 url = normalize_url(url) db_value = Url.query.filter_by(forward_to=url).first() if not db_value: _hash = generate_hash() while Url.query.filter_by(hash=_hash).first(): # Means that hash is already used _hash = generate_hash() db_value = Url(hash=_hash, forward_to=url) db.session.add(db_value) db.session.commit() return { 'hash': db_value.hash, 'visited_times': db_value.visited_times }, 200
def create_url(): if not request.is_json: abort(422) content = request.json redirect_url = content.get('url', '').strip() if not is_valid_url(redirect_url): abort(422) # Check if it already exists url = Url.query.filter(Url.redirect == redirect_url).first() if url: return jsonify({'shorter': url.get_full_short()}) next_id = db.session.execute(Sequence("urls_id_seq")) url = Url(id=next_id, redirect=redirect_url, slug=to_emoji_slug(next_id)) db.session.add(url) db.session.commit() return jsonify({'shorter': url.get_full_short()})
def main(request): host = request.META['HTTP_HOST'] if request.method == "GET": output = ("<form action='/' method='POST'>\n" + "Introduce your url:" + "<input type='text' name='url'/></br>\n" + "<input type='submit' value='Submit' " + "/></form>\n<br>\n<br>" + str(Url.objects.values_list())) elif request.method == "POST": urlname = urllib.unquote(request.body.split("=")[1]) if (not urlname.startswith("http://") and not urlname.startswith("https://")): urlname = "http://" + urlname try: urlname = Url.objects.get(url=urlname).url except Url.DoesNotExist: new_entry = Url(url=urlname) new_entry.save() urlnum = Url.objects.get(url=urlname).id output = ("You introduced: " + str(urlname) + "</br>\n" + "The abbreviation is: /" + str(urlnum) + "</br>\n" + "<meta http-equiv='Refresh' content='2;" + "url=http://" + host + "'>") else: return HttpResponseForbidden("Method not allowed") return HttpResponse(output)
def get_currently_selected_subgenre(self): """Return the Url of the currently selected subgenre""" selected = None selected_tag = self._get_currently_selected_subgenre_tag() if selected_tag: selected = Url(selected_tag.get('href'), selected_tag.string) return selected
def test_mock_generation(self): """ Tests that a mock slug is properly generated """ u1 = Url(url='http://lab.tmp.br/%s/index.html' % MOCK_MARK) u1.save() self.assertEqual(u1.slug, MOCK_MARK[:MIN_SLUG])
def main(request, **kwargs): if request.method == "POST": form = UrlForm(request.POST) if form.is_valid(): url = Url() url.original_url = form.cleaned_data['url'] url.save() c = { "form": form, "url": url, } return render_to_response("main.html", c, context_instance=RequestContext(request)) else: c = { "form": form, } return render_to_response("main.html", c, context_instance=RequestContext(request)) else: form = UrlForm() c = { "form": form, } return render_to_response("main.html", c, context_instance=RequestContext(request))
def make_it(original_url): short_url = Url() setattr(short_url, "original", original_url) setattr(short_url, "short_url", hashfunc()) models.storage.new(short_url) models.storage.save() return jsonify(short_url.to_dict())
def index(): body = request.get_json() alias = body.get('alias') origin = body.get('origin') if request.method == 'POST' and alias is not None: exists = db.session.query( db.exists().where(Url.alias == alias)).scalar() if exists: return {'code': 410, 'message': 'Alias repeat'}, 410 if request.method == 'POST' and alias is None: def gen(): chars = string.ascii_letters + string.digits length = 6 alias = ''.join(choice(chars) for x in range(length)) exists = db.session.query( db.exists().where(Url.alias == alias)).scalar() if not exists: return alias alias = gen() while alias is None: alias = gen() if request.method == 'POST' and alias is not None: if origin is not None: url = Url(alias=alias, origin=origin) db.session.add(url) db.session.commit() else: return {'code': 400, 'message': 'Validation Failed'}, 400 return {'code': 200, 'message': 'ok.'}
def api_add_url(): #check_admin() i = ctx.request.input(url='', frequent='', top_num='', summary='') url = i.url.strip() frequent = i.frequent.strip() top_num = i.top_num.strip() summary = i.summary if not url: raise APIValueError('url', 'url cannot be empty.') if frequent and not _RE_FREQUENT.match(frequent): raise APIValueError('frequent', 'frequent MUST be num. or empty') if top_num and not _RE_TOP_NUM.match(top_num): raise APIValueError('top_num', 'top_num must be 1-999 or empty.') user = ctx.request.user if frequent == '': frequent = 30 if top_num == '': top_num = 1 url = Url(user_id=user.id, url=url, frequent=frequent, top_num=top_num, summary=summary) url.insert() return url
def query_bitly(longUrl, user): l = urllib.quote(longUrl, '') if (longUrl[:7].lower() != 'http://' and urllib.unquote(longUrl)[:7].lower() != 'http://' and longUrl[:8].lower() != 'https://' and urllib.unquote(longUrl)[:8].lower() != 'https://'): l = urllib.quote('http://' + longUrl, '') result = urlfetch.fetch(JMP_URL + l) logging.debug('posted to bit.ly: %s' % l) if result.status_code != 200: return 'Sorry! Query failed.' j = json.JSONDecoder() data = j.decode(result.content) if data.get('status_code') == 403: logging.warning('RATE LIMIT EXCEEDED') return 'Sorry! Experiencing rate limits from bit.ly' if data.get('status_code') != 200: logging.error(result.content) return 'Sorry! bit.ly did not accept the query. Make sure that your message only contains a URL.' url = Url(longUrl=data.get('data').get('long_url'), shortUrl=data.get('data').get('url'), creator=user) url.put() return data.get('data').get('url')
def shorten(): long_url = request.args.get("url") token = request.args.get("token") format = request.args.get("format", "simple") ip = request.headers.get("X-Forwarded-For") if rate_limit_exceeded(ip, token): if format == "html": return redirect_and_flash( render_template("rate_limit_exceeded.html")) else: abort(429) url = Url(url=long_url) url.save() log_ip = Ip(ip=ip, token=token, time=datetime.now()) log_ip.save() root_url = url_for("index", _external=True, _scheme="https") slug = short_url.encode_url(url.id) new_url = root_url + slug print(new_url) if format == "html": return redirect_and_flash( render_template("new_url.html", new_url=new_url)) elif format == "json": return jsonify(url=new_url) return new_url
def create(): """ Register the url """ origin_url = request.values.get('origin_url', None) if not origin_url: raise ApiException('"origin_url" is required') MAX_TRY = 5 for cnt in range(MAX_TRY): try: u = Url(short_url=Url.gen_short_url(), origin_url=origin_url) except ValidationError as e: raise ApiException(str(e)) if redis_cli.set(u.redis_key, u.json(), nx=True, ex=u.SHORT_URL_EXPIRE_SECONDS): break if cnt + 1 == MAX_TRY: raise ApiException('Collision happened. Please try again.') return jsonify({'short_url': u.short_url})
async def test(url: UrlSchema): url = dict(url) if (url["customCode"]): shortCode = url["customCode"] else: shortCode = shortuuid.ShortUUID().random(length=8) shortUrl = os.path.join(config("BASE_URL"), shortCode) urlExists = Url.objects(shortCode=shortCode) if len(urlExists) != 0: raise HTTPException(status_code=400, detail="Short code is invalid, It has been used.") try: url = Url(longUrl=url["longUrl"], shortCode=shortCode, shortUrl=shortUrl) url.save() return { "message": "Successfully shortened URL.", "shortUrl": shortUrl, "longUrl": url["longUrl"] } except Exception as e: print(e) raise HTTPException(status_code=500, detail="An unknown error occurred.")
def crawler(url=None, depth=1): print 'Start url:' + str(url) if depth < 0: return if Url.query.filter(Url.url == url).count() != 0: return u = Url(url) db.session.add(u) db.session.commit() try: html = urllib2.urlopen(url).read().decode('utf8') except (ValueError, urllib2.HTTPError, UnicodeError): print 'ERROR: Can\'t get html from url' return print 'Parse links' bs = BeautifulSoup(html, 'html.parser') netloc = get_netloc(url) for link in bs.find_all('a', href=True): new_url = link['href'] if not new_url.startswith('http'): new_url = urlparse.urljoin(url, new_url) if new_url.startswith(netloc): crawler.delay(new_url, depth=depth - 1) parse_html(url, bs)
def post(self): self.parser.add_argument('url', type=url_validator, required=True, help='url is invalid') self.parser.add_argument('group', type=url_group, required=True, help='invalid group id', dest='group_id') self.parser.add_argument('short_url', type=short_url, help='short url must be at least 3 characters in length') args = self.parser.parse_args() url = Url.query.filter((Url.path == args['url']) & (Url.user_id == g.user.id)).first() if url is not None: message = '{0} has been shortened'.format(args['url']) return {'message':message}, 403 length = random.randint(4, 8) custom = False if args['short_url'] == '': short_path = self.shorten(length, g.user.id) else: short_path = args['short_url'] url = Url.query.filter((Url.short_path == short_path)).first() if url is not None: message = '{0} is not available'.format(short_path) return {'message':message}, 403 custom = True url = Url(group_id=args['group_id'], user_id=g.user.id, path=args['url'], short_path=short_path, custom=custom) db.session.add(url) db.session.commit() return marshal(url, self.urlField, envelope='data'), 201
def extract_from_url(url): '''From history info, extract url, title and body of page, cleaned with BeautifulSoup''' req = requests.get(url, allow_redirects=True, timeout=10) req.encoding = 'utf-8' if req.status_code is not 200: logging.exception("Warning: " + str(req.url) + ' has a status code of: ' \ + str(req.status_code) + ' omitted from database.\n') bs_obj = BeautifulSoup(req.text, "lxml") if hasattr(bs_obj.title, 'string') & (req.status_code == requests.codes.ok): if url.startswith('http'): title = bs_obj.title.string checks = ['script', 'style', 'meta', '<!--'] for chk in bs_obj.find_all(checks): chk.extract() body = bs_obj.get_text() pattern = re.compile('(^[\s]+)|([\s]+$)', re.MULTILINE) body_str = re.sub(pattern, " ", body) snippet = body_str[:100].replace(',', '-') if title is None: title = u'Untitled' u = Url(url=url, title=title, snippet=snippet) logging.exception("Processed", url, "...") logging.exception(u.title, body_str) return u, body_str
def shortener(request): if request.method == "GET": urlDb = Url.objects.all() urlDic = "" for url in urlDb: urlDic += "URL " + str(url.url) + " Shortened URL " + str( url.id) + "<br/>" resp = "<body><html> <form id= shortUrl method= post> \ <fieldset><legend>URL shortener</legend><label> Url</label> \ <input id= campo1 name= Url type= text /></label> \ <input id= campo2 name= pressbutton type= submit value= Shorten URL/> \ </fieldset> </form> <p> URL Dictionary </p>" \ + urlDic + "</body></html>" elif request.method == "POST": url = request.body.split("=") url = url[1].split("&") url = url[0] try: url = Url.objects.get(url=url) except Url.DoesNotExist: new = Url(url=url) new.save() urlId = str(Url.objects.get(url=url).id) resp = "<html><body>URL " + url + " Shortened URL \ <a href= http://" + url + ">" + urlId + "</a> \ </body></html>" return HttpResponse(resp)
def barra(request): formul = '<br><form action="" method="POST" accept-charset="UTF-8">' + \ 'URL para acortar: <input type="text" name="url">' + \ '<input type="submit" value="Acorta!"></form><hr>' srvHost = str(request.META["SERVER_NAME"]) srvPort = str(request.META["SERVER_PORT"]) if request.method == "GET": urlshtml = "" urls = Url.objects.all() for url in urls: urlshtml += formatUrlHtml(url, srvHost, srvPort) return HttpResponse(formul + urlshtml) elif request.method == "POST": longUrl = request.POST.get("url", "") if longUrl == "": salida = "Incorrect post or empty url" else: if not longUrl.startswith("http://") and \ not longUrl.startswith("https://"): longUrl = "http://" + longUrl try: newUrl = Url.objects.get(long_url=longUrl) except Url.DoesNotExist: newUrl = Url(long_url=longUrl) newUrl.save() salida = formatUrlHtml(newUrl, srvHost, srvPort) return HttpResponse(salida) else: return HttpResponseNotAllowed("Method not allowed in this server")
def test_get_origin_url(client): u = Url(origin_url=fake.uri(), short_url='fake_url') redis_cli.set(u.redis_key, u.json()) resp = client.get(f'/v1/url/{u.short_url}') assert resp.status_code == 200 assert resp.json.get('origin_url') == u.origin_url
def get_top_level_genre_urls(self): """Return a list of all top level genre urls on this page.""" a_list = self.get_top_level_genre_tags() urls = [] for tag in a_list: new_url = Url(tag.get('href'), tag.string) urls.append(new_url) return urls
def report_url(request): if request.method == 'POST': url = request.POST['url'] try: newUrl = Url(url=url) newUrl.save() except Exception: return HttpResponse("ERROR") return HttpResponse("SUCCESS")
def _get_or_create_url(self, url): url_query = Url.query.filter(Url.url == url) if url_query.count() == 0: url = Url(url) db.session.add(url) db.session.commit() else: url = url_query[0] return url
def run(self): # Fixtures if Domain.objects.count() == 0: domain = Domain(scheme='http', netloc='sametmax.com') url = Url(path='/') domain.urls.append(url) domain.save() for domain in Domain.objects: self._find_links(domain)
def get_currently_selected_letter(self): """Return the Url for the current letter""" selected = None letters = self.get_letter_tags() if letters: parent_list = letters[0].parent.parent selected_tag = parent_list.find("a", class_="selected") if selected_tag: selected = Url(selected_tag.get('href'), selected_tag.string) return selected
def shortenURL(actualUrl): shortURL = parseURL(actualUrl) created = None try: objUrl = Url.objects.get(shortURL=shortURL) created = False except Url.DoesNotExist: objUrl = Url(actualUrl=actualUrl, shortURL=shortURL) objUrl.save() created = True return created, shortURL