Esempio n. 1
0
    def get_currently_selected_genre(self):
        """
        Return the Url for the current genre

        If the current genre element with a "selected" css class is a
        subgenre, this will return the subgenre's parent genre.

        """
        selected = None
        genres = self.get_top_level_genre_tags()
        if genres:
            for tag in genres:
                if "selected" in tag['class']:
                    selected = Url(tag.get('href'), tag.string)
                    break
            #No hits in genres means a subgenre is currently selected
            else:
                subgenre = self._get_currently_selected_subgenre_tag()
                if subgenre:
                    parent_li = subgenre.parent.parent.parent
                    selected_tag = parent_li.find("a",
                                                  class_="top-level-genre")
                    if selected_tag:
                        selected = Url(selected_tag.get('href'),
                                       selected_tag.string)
        return selected
Esempio n. 2
0
    def _get(self, version, method, url_or_urls, **kwargs):
        """
        _get makes the actual call to api.embed.ly
        """
        if not url_or_urls:
            raise ValueError('%s requires a url or a list of urls given: %s' %
                             (method.title(), url_or_urls))

        #A flag we can use instead of calling isinstance all the time.
        multi = isinstance(url_or_urls, list)

        # Throw an error early for too many URLs
        if multi and len(url_or_urls) > 20:
            raise ValueError('Embedly accepts only 20 urls at a time. Url ' \
                'Count:%s' % len(url_or_urls))

        query = ''

        key = kwargs.get('key', self.key)

        #make sure that a key was set on the client or passed in.
        if not key:
            raise ValueError('Requires a key. None given: %s' % (key))

        kwargs['key'] = key

        query += urllib.urlencode(kwargs)

        if multi:
            query += '&urls=%s&' % ','.join(
                [urllib.quote(url) for url in url_or_urls])
        else:
            query += '&url=%s' % urllib.quote(url_or_urls)

        url = 'http://api.embed.ly/%s/%s?%s' % (version, method, query)

        http = httplib2.Http(timeout=self.timeout)

        headers = {'User-Agent': self.user_agent}

        resp, content = http.request(url, headers=headers)

        if resp['status'] == '200':
            data = json.loads(content)

            if kwargs.get('raw', False):
                data['raw'] = content
        else:
            data = {
                'type': 'error',
                'error': True,
                'error_code': int(resp['status'])
            }

        if multi:
            return map(lambda url, data: Url(data, method, url), url_or_urls,
                       data)

        return Url(data, method, url_or_urls)
Esempio n. 3
0
def test_get_urls_list(client):
    u = Url(origin_url=fake.uri(), short_url='fake_url')
    u2 = Url(origin_url=fake.uri(), short_url='fake_url_2')
    redis_cli.set(u.redis_key, u.json())
    redis_cli.set(u2.redis_key, u2.json())

    resp = client.get(f'/v1/url')

    assert resp.status_code == 200
    assert u in resp.json.get('urls')
    assert u2 in resp.json.get('urls')
Esempio n. 4
0
 def test_slug_shortening(self):
     """
     Tests that a slug is properly generated
     """
     u1 = Url(url='http://lab.tmp.br/%s/index.html' % MOCK_MARK)
     u1.save()
     u2 = Url(url='http://another.lab.tmp.br/%s/index.html' % MOCK_MARK)
     u2.save()
     u3 = Url(url='http://yetanother.lab.tmp.br/%s/index.html' % MOCK_MARK)
     u3.save()
     self.assertEqual(u1.slug, MOCK_MARK[:MIN_SLUG])
     self.assertEqual(u2.slug, MOCK_MARK[:MIN_SLUG + 1])
Esempio n. 5
0
 def test_slug_shortening_failure(self):
     """
     When a slug cannot be generated, SlugCollision is raised
     """
     u = Url(url='http://lab.tmp.br/%s/index.html' % MOCK_MARK)
     u.save()
     slug = u.slug
     while len(slug) < MAX_SLUG:
         uu = Url(url='http://another.lab.tmp.br/%s/index%s.html' %
                  (MOCK_MARK, len(slug)))
         uu.save()
         slug = uu.slug
     uuu = Url(url='http://last.lab.tmp.br/%s/index%s.html' %
               (MOCK_MARK, len(slug)))
     self.assertRaises(SlugCollision, uuu.save)
Esempio n. 6
0
def shorten():
    url = request.form.get('url')
    if not url:
        return 'No URL provided', 400

    if not is_url(url):
        return 'The URL is invalid', 400

    url = normalize_url(url)

    db_value = Url.query.filter_by(forward_to=url).first()
    if not db_value:
        _hash = generate_hash()
        while Url.query.filter_by(hash=_hash).first():
            # Means that hash is already used
            _hash = generate_hash()

        db_value = Url(hash=_hash, forward_to=url)
        db.session.add(db_value)
        db.session.commit()

    return {
        'hash': db_value.hash,
        'visited_times': db_value.visited_times
    }, 200
Esempio n. 7
0
    def create_url():
        if not request.is_json:
            abort(422)

        content = request.json
        redirect_url = content.get('url', '').strip()

        if not is_valid_url(redirect_url):
            abort(422)

        # Check if it already exists
        url = Url.query.filter(Url.redirect == redirect_url).first()

        if url:
            return jsonify({'shorter': url.get_full_short()})

        next_id = db.session.execute(Sequence("urls_id_seq"))
        url = Url(id=next_id,
                  redirect=redirect_url,
                  slug=to_emoji_slug(next_id))

        db.session.add(url)
        db.session.commit()

        return jsonify({'shorter': url.get_full_short()})
Esempio n. 8
0
def main(request):
    host = request.META['HTTP_HOST']

    if request.method == "GET":
        output = ("<form action='/' method='POST'>\n"
                + "Introduce your url:"
                + "<input type='text' name='url'/></br>\n"
                + "<input type='submit' value='Submit' "
                + "/></form>\n<br>\n<br>"
                + str(Url.objects.values_list()))

    elif request.method == "POST":
        urlname = urllib.unquote(request.body.split("=")[1])
        if (not urlname.startswith("http://") 
                and not urlname.startswith("https://")):
            urlname = "http://" + urlname

        try:
            urlname = Url.objects.get(url=urlname).url
        except Url.DoesNotExist:
            new_entry = Url(url=urlname)
            new_entry.save()

        urlnum = Url.objects.get(url=urlname).id
        output = ("You introduced: " + str(urlname) + "</br>\n"
                + "The abbreviation is: /" + str(urlnum) + "</br>\n"
                + "<meta http-equiv='Refresh' content='2;"
                + "url=http://" + host + "'>")
    else:
        return HttpResponseForbidden("Method not allowed")

    return HttpResponse(output)
Esempio n. 9
0
 def get_currently_selected_subgenre(self):
     """Return the Url of the currently selected subgenre"""
     selected = None
     selected_tag = self._get_currently_selected_subgenre_tag()
     if selected_tag:
         selected = Url(selected_tag.get('href'), selected_tag.string)
     return selected
Esempio n. 10
0
 def test_mock_generation(self):
     """
     Tests that a mock slug is properly generated
     """
     u1 = Url(url='http://lab.tmp.br/%s/index.html' % MOCK_MARK)
     u1.save()
     self.assertEqual(u1.slug, MOCK_MARK[:MIN_SLUG])
Esempio n. 11
0
def main(request, **kwargs):
    if request.method == "POST":
        form = UrlForm(request.POST)
        if form.is_valid():
            url = Url()
            url.original_url = form.cleaned_data['url']
            url.save()
            c = {
                "form": form,
                "url": url,
            }
            return render_to_response("main.html",
                                      c,
                                      context_instance=RequestContext(request))
        else:
            c = {
                "form": form,
            }
            return render_to_response("main.html",
                                      c,
                                      context_instance=RequestContext(request))
    else:
        form = UrlForm()
        c = {
            "form": form,
        }
        return render_to_response("main.html",
                                  c,
                                  context_instance=RequestContext(request))
Esempio n. 12
0
def make_it(original_url):
    short_url = Url()
    setattr(short_url, "original", original_url)
    setattr(short_url, "short_url", hashfunc())
    models.storage.new(short_url)
    models.storage.save()
    return jsonify(short_url.to_dict())
Esempio n. 13
0
def index():
    body = request.get_json()
    alias = body.get('alias')
    origin = body.get('origin')

    if request.method == 'POST' and alias is not None:
        exists = db.session.query(
            db.exists().where(Url.alias == alias)).scalar()
        if exists:
            return {'code': 410, 'message': 'Alias repeat'}, 410

    if request.method == 'POST' and alias is None:

        def gen():
            chars = string.ascii_letters + string.digits
            length = 6
            alias = ''.join(choice(chars) for x in range(length))
            exists = db.session.query(
                db.exists().where(Url.alias == alias)).scalar()
            if not exists:
                return alias

        alias = gen()
        while alias is None:
            alias = gen()

    if request.method == 'POST' and alias is not None:
        if origin is not None:
            url = Url(alias=alias, origin=origin)
            db.session.add(url)
            db.session.commit()
        else:
            return {'code': 400, 'message': 'Validation Failed'}, 400
    return {'code': 200, 'message': 'ok.'}
Esempio n. 14
0
def api_add_url():
    #check_admin()
    i = ctx.request.input(url='', frequent='', top_num='', summary='')
    url = i.url.strip()
    frequent = i.frequent.strip()
    top_num = i.top_num.strip()
    summary = i.summary
    if not url:
        raise APIValueError('url', 'url cannot be empty.')
    if frequent and not _RE_FREQUENT.match(frequent):
        raise APIValueError('frequent', 'frequent MUST be num. or empty')
    if top_num and not _RE_TOP_NUM.match(top_num):
        raise APIValueError('top_num', 'top_num must be 1-999  or empty.')
    user = ctx.request.user
    if frequent == '':
        frequent = 30

    if top_num == '':
        top_num = 1
    url = Url(user_id=user.id,
              url=url,
              frequent=frequent,
              top_num=top_num,
              summary=summary)
    url.insert()
    return url
Esempio n. 15
0
def query_bitly(longUrl, user):
    l = urllib.quote(longUrl, '')
    if (longUrl[:7].lower() != 'http://'
            and urllib.unquote(longUrl)[:7].lower() != 'http://'
            and longUrl[:8].lower() != 'https://'
            and urllib.unquote(longUrl)[:8].lower() != 'https://'):
        l = urllib.quote('http://' + longUrl, '')

    result = urlfetch.fetch(JMP_URL + l)
    logging.debug('posted to bit.ly: %s' % l)
    if result.status_code != 200:
        return 'Sorry! Query failed.'
    j = json.JSONDecoder()
    data = j.decode(result.content)
    if data.get('status_code') == 403:
        logging.warning('RATE LIMIT EXCEEDED')
        return 'Sorry! Experiencing rate limits from bit.ly'
    if data.get('status_code') != 200:
        logging.error(result.content)
        return 'Sorry! bit.ly did not accept the query. Make sure that your message only contains a URL.'
    url = Url(longUrl=data.get('data').get('long_url'),
              shortUrl=data.get('data').get('url'),
              creator=user)
    url.put()
    return data.get('data').get('url')
Esempio n. 16
0
def shorten():
    long_url = request.args.get("url")
    token = request.args.get("token")
    format = request.args.get("format", "simple")
    ip = request.headers.get("X-Forwarded-For")

    if rate_limit_exceeded(ip, token):
        if format == "html":
            return redirect_and_flash(
                render_template("rate_limit_exceeded.html"))
        else:
            abort(429)

    url = Url(url=long_url)
    url.save()

    log_ip = Ip(ip=ip, token=token, time=datetime.now())
    log_ip.save()

    root_url = url_for("index", _external=True, _scheme="https")
    slug = short_url.encode_url(url.id)
    new_url = root_url + slug

    print(new_url)

    if format == "html":
        return redirect_and_flash(
            render_template("new_url.html", new_url=new_url))
    elif format == "json":
        return jsonify(url=new_url)

    return new_url
Esempio n. 17
0
def create():
    """
    Register the url
    """
    origin_url = request.values.get('origin_url', None)

    if not origin_url:
        raise ApiException('"origin_url" is required')

    MAX_TRY = 5

    for cnt in range(MAX_TRY):
        try:
            u = Url(short_url=Url.gen_short_url(), origin_url=origin_url)
        except ValidationError as e:
            raise ApiException(str(e))

        if redis_cli.set(u.redis_key,
                         u.json(),
                         nx=True,
                         ex=u.SHORT_URL_EXPIRE_SECONDS):
            break

        if cnt + 1 == MAX_TRY:
            raise ApiException('Collision happened. Please try again.')

    return jsonify({'short_url': u.short_url})
Esempio n. 18
0
async def test(url: UrlSchema):
    url = dict(url)

    if (url["customCode"]):
        shortCode = url["customCode"]
    else:
        shortCode = shortuuid.ShortUUID().random(length=8)

    shortUrl = os.path.join(config("BASE_URL"), shortCode)

    urlExists = Url.objects(shortCode=shortCode)
    if len(urlExists) != 0:
        raise HTTPException(status_code=400,
                            detail="Short code is invalid, It has been used.")

    try:
        url = Url(longUrl=url["longUrl"],
                  shortCode=shortCode,
                  shortUrl=shortUrl)

        url.save()

        return {
            "message": "Successfully shortened URL.",
            "shortUrl": shortUrl,
            "longUrl": url["longUrl"]
        }
    except Exception as e:
        print(e)
        raise HTTPException(status_code=500,
                            detail="An unknown error occurred.")
Esempio n. 19
0
def crawler(url=None, depth=1):
    print 'Start url:' + str(url)
    if depth < 0:
        return
    if Url.query.filter(Url.url == url).count() != 0:
        return

    u = Url(url)
    db.session.add(u)
    db.session.commit()

    try:
        html = urllib2.urlopen(url).read().decode('utf8')
    except (ValueError, urllib2.HTTPError, UnicodeError):
        print 'ERROR: Can\'t get html from url'
        return

    print 'Parse links'
    bs = BeautifulSoup(html, 'html.parser')
    netloc = get_netloc(url)

    for link in bs.find_all('a', href=True):
        new_url = link['href']
        if not new_url.startswith('http'):
            new_url = urlparse.urljoin(url, new_url)
        if new_url.startswith(netloc):
            crawler.delay(new_url, depth=depth - 1)

    parse_html(url, bs)
Esempio n. 20
0
    def post(self):
        self.parser.add_argument('url', type=url_validator, required=True, help='url is invalid')
        self.parser.add_argument('group', type=url_group, required=True, help='invalid group id', dest='group_id')
        self.parser.add_argument('short_url', type=short_url, help='short url must be at least 3 characters in length')

        args = self.parser.parse_args()

        url = Url.query.filter((Url.path == args['url']) & (Url.user_id == g.user.id)).first()

        if url is not None:
            message = '{0} has been shortened'.format(args['url'])
            return {'message':message}, 403

        length = random.randint(4, 8)
        custom = False

        if args['short_url'] == '':
            short_path = self.shorten(length, g.user.id)
        else:
            short_path = args['short_url']
            url = Url.query.filter((Url.short_path == short_path)).first()

            if url is not None:
                message = '{0} is not available'.format(short_path)
                return {'message':message}, 403
            
            custom = True

        url = Url(group_id=args['group_id'], user_id=g.user.id, path=args['url'], short_path=short_path, custom=custom)
        db.session.add(url)
        db.session.commit()

        return marshal(url, self.urlField, envelope='data'), 201
Esempio n. 21
0
def extract_from_url(url):
    '''From history info, extract url, title and body of page,
    cleaned with BeautifulSoup'''
    req = requests.get(url, allow_redirects=True, timeout=10)
    req.encoding = 'utf-8'
    if req.status_code is not 200:
        logging.exception("Warning: "  + str(req.url) + ' has a status code of: ' \
          + str(req.status_code) + ' omitted from database.\n')
    bs_obj = BeautifulSoup(req.text, "lxml")
    if hasattr(bs_obj.title, 'string') & (req.status_code
                                          == requests.codes.ok):
        if url.startswith('http'):
            title = bs_obj.title.string
            checks = ['script', 'style', 'meta', '<!--']
            for chk in bs_obj.find_all(checks):
                chk.extract()
            body = bs_obj.get_text()
            pattern = re.compile('(^[\s]+)|([\s]+$)', re.MULTILINE)
            body_str = re.sub(pattern, " ", body)
            snippet = body_str[:100].replace(',', '-')
            if title is None:
                title = u'Untitled'
            u = Url(url=url, title=title, snippet=snippet)
    logging.exception("Processed", url, "...")
    logging.exception(u.title, body_str)
    return u, body_str
Esempio n. 22
0
def shortener(request):
    if request.method == "GET":
        urlDb = Url.objects.all()
        urlDic = ""
        for url in urlDb:
            urlDic += "URL " + str(url.url) + " Shortened URL " + str(
                url.id) + "<br/>"

        resp = "<body><html> <form id= shortUrl method= post> \
                <fieldset><legend>URL shortener</legend><label> Url</label> \
                <input id= campo1 name= Url type= text /></label> \
                <input id= campo2 name= pressbutton type= submit value= Shorten URL/> \
                </fieldset> </form> <p> URL Dictionary </p>" \
                + urlDic + "</body></html>"

    elif request.method == "POST":
        url = request.body.split("=")
        url = url[1].split("&")
        url = url[0]
        try:
            url = Url.objects.get(url=url)
        except Url.DoesNotExist:
            new = Url(url=url)
            new.save()
        urlId = str(Url.objects.get(url=url).id)
        resp = "<html><body>URL " + url + " Shortened URL \
                <a href= http://" + url + ">" + urlId + "</a> \
                </body></html>"

    return HttpResponse(resp)
Esempio n. 23
0
def barra(request):
    formul = '<br><form action="" method="POST" accept-charset="UTF-8">' + \
        'URL para acortar: <input type="text" name="url">' + \
        '<input type="submit" value="Acorta!"></form><hr>'
    srvHost = str(request.META["SERVER_NAME"])
    srvPort = str(request.META["SERVER_PORT"])

    if request.method == "GET":
        urlshtml = ""
        urls = Url.objects.all()
        for url in urls:
            urlshtml += formatUrlHtml(url, srvHost, srvPort)
        return HttpResponse(formul + urlshtml)
    elif request.method == "POST":
        longUrl = request.POST.get("url", "")
        if longUrl == "":
            salida = "Incorrect post or empty url"
        else:
            if not longUrl.startswith("http://") and \
                    not longUrl.startswith("https://"):
                longUrl = "http://" + longUrl
            try:
                newUrl = Url.objects.get(long_url=longUrl)
            except Url.DoesNotExist:
                newUrl = Url(long_url=longUrl)
                newUrl.save()
            salida = formatUrlHtml(newUrl, srvHost, srvPort)
        return HttpResponse(salida)
    else:
        return HttpResponseNotAllowed("Method not allowed in this server")
Esempio n. 24
0
def test_get_origin_url(client):
    u = Url(origin_url=fake.uri(), short_url='fake_url')
    redis_cli.set(u.redis_key, u.json())

    resp = client.get(f'/v1/url/{u.short_url}')

    assert resp.status_code == 200
    assert resp.json.get('origin_url') == u.origin_url
Esempio n. 25
0
 def get_top_level_genre_urls(self):
     """Return a list of all top level genre urls on this page."""
     a_list = self.get_top_level_genre_tags()
     urls = []
     for tag in a_list:
         new_url = Url(tag.get('href'), tag.string)
         urls.append(new_url)
     return urls
Esempio n. 26
0
def report_url(request):
  if request.method == 'POST':
    url = request.POST['url']
    try:
      newUrl = Url(url=url)
      newUrl.save()
    except Exception:
      return HttpResponse("ERROR")
    return HttpResponse("SUCCESS")
Esempio n. 27
0
 def _get_or_create_url(self, url):
     url_query = Url.query.filter(Url.url == url)
     if url_query.count() == 0:
         url = Url(url)
         db.session.add(url)
         db.session.commit()
     else:
         url = url_query[0]
     return url
Esempio n. 28
0
    def run(self):
        # Fixtures
        if Domain.objects.count() == 0:
            domain = Domain(scheme='http', netloc='sametmax.com')
            url = Url(path='/')
            domain.urls.append(url)
            domain.save()

        for domain in Domain.objects:
            self._find_links(domain)
Esempio n. 29
0
 def get_currently_selected_letter(self):
     """Return the Url for the current letter"""
     selected = None
     letters = self.get_letter_tags()
     if letters:
         parent_list = letters[0].parent.parent
         selected_tag = parent_list.find("a", class_="selected")
         if selected_tag:
             selected = Url(selected_tag.get('href'), selected_tag.string)
     return selected
Esempio n. 30
0
def shortenURL(actualUrl):
    shortURL = parseURL(actualUrl)
    created = None
    try:
        objUrl = Url.objects.get(shortURL=shortURL)
        created = False
    except Url.DoesNotExist:
        objUrl = Url(actualUrl=actualUrl, shortURL=shortURL)
        objUrl.save()
        created = True
    return created, shortURL