Example #1
0
 def test_create_url(self):
     p = Page()
     p.save()
     u = URL(TEST_URL)
     u.add_content(p)
     self.assertTrue(p.content_ptr in u.get_content(),
                      "URL does not point to correct content.")
Example #2
0
def acortar(request):
	if request.method == "GET":  
		URLs_guardadas = URL.objects.all()
		plantilla_principal = get_template("pagina_principal.html")
		Context = ({'lista_urls': URLs_guardadas})
		return HttpResponse(plantilla_principal.render(Context))

	elif request.method == "POST":
		url = request.POST.get("url")
		if url == "":
			mensaje = "Está vacío"
			plantilla_error = get_template("pagina_error.html")
			Context = ({'mensaje': mensaje})
			return HttpResponseBadRequest(plantilla_error.render(Context))

		elif not url.startswith("http://") and not url.startswith("https://"):
			url = "http://" + url

		try:
			nueva_URL = URL.objects.get(direccion_URL=url)

		except URL.DoesNotExist:
			nueva_URL = URL(direccion_URL=url)
			nueva_URL.save()
		plantilla_seleccion = get_template("pagina_seleccion.html")
		id = nueva_URL.id
		Context = ({'id': id})
		return HttpResponse(plantilla_seleccion.render(Context))

	else:
		mensaje = "Algo ha ido mal"
		plantilla_error = get_template("pagina_error.html")
		Context = ({'mensaje': mensaje})
		return HttpResponse(plantilla_error.render(Context))
Example #3
0
 def test_lookup_by_url(self):
     p = Page()
     p.save()
     u = URL(TEST_URL)
     u.add_content(p)
     found = URL.objects.get(url=TEST_URL)
     self.assertTrue(p.content_ptr in found.get_content(),
                      "Unable to find correct content by URL.")
def generate_hash():
    if 'url' not in request.json or type(request.json['url']) != str:
        return {
            'status': 400,
            'message': 'url either not found in body or invalid'
        }, 400

    long_url = request.json['url']
    base_url = request.base_url
    base_url = base_url.replace('/generate', '')

    protocol = long_url.split('://')[0]
    if protocol != 'http' and protocol != 'https':
        return {
            'status':
            400,
            'message':
            'Invalid url in request body. URL must start with http or https'
        }, 400

    url_hash = URL.save_hash(long_url)
    return {
        'status': 200,
        'message': 'URL generated successfully',
        'url': f'{base_url}/{url_hash}'
    }, 200
Example #5
0
def insert_url(url:str, code:str) -> int:
    """Inserts a new URL"""
    q = URL.insert(url=url, code=code)
    url_id = q.execute()
    Stats.insert(url=url_id, usage_count=0).execute()

    return url_id
Example #6
0
def parse_urls(session):
  # get name of file to read
  urlfile = os.path.join(os.path.expanduser(cfg.CURRENT_DIR), cfg.URLLOG)

  # read the file, write lines to database, and save lines that were not
  # written to the database
  # TODO may need to check if file is already open using a file lock
  if os.path.isfile(urlfile):
	  f = open(urlfile, 'r+')
	  lines_to_save = []
	  for line in f:		  
		  try:
			  text = ast.literal_eval(line.rstrip())
			  url = URL(unicode(text['time']), unicode(text['browser']), unicode(text['title']), unicode(text['url']), unicode(text['event']))
			  session.add(url)
		  except:
			  print "Could not save " + str(line) + " to the database. Saving for the next round of parsing."
			  lines_to_save.append(line)
	  # write lines that did not make it into the database to the start of the
	  # file and delete the rest of the file
	  f.seek(0)
	  for line in lines_to_save:
		  f.write(line)
	  f.truncate()
	  f.close()
Example #7
0
def add(request):
    if request.method == 'POST':
        form = AddUserLinkForm(request.POST)
        if form.is_valid():
            if not form.cleaned_data['link'].startswith('http://'):
                form.cleaned_data['link'] = 'http://' + form.cleaned_data['link']
        if form.is_valid():
            cd = form.cleaned_data
            url                = URL()
            url.name           = cd['label']
            url.other_profile  = cd['link']
            url.user           = request.user.get_profile()
            url.type           = URL.PERFORMANCES
            url.save()
            request.user.get_profile().save()
            return HttpResponseRedirect(
                    '/links/' + request.user.get_profile().name + '/'
                )
    else:
        form = AddUserLinkForm()
    return render_to_response(
        'add_user_link.html',
        {
            'form':     form,
            'all_tags': Tag.objects.all()
        },
        RequestContext(request))
Example #8
0
def acorta(request):

    if request.method == 'GET':

        template = get_template('plantilla.html')
        context = ({})
        return HttpResponse(template.render(context))

    elif request.method == 'POST':

        url_original = request.POST['original']
        print str(url_original)
        if not url_original.find("http://"):
            #print "tiene http://"
            pass
        elif not url_original.find("https://"):
            #print "Tiene el https, no hace falta ponerlo"
            pass
        else:
            #print "No lo tiene, ponemos el http://"
            url_original = "http://" + url_original

        try:

            pages = URL.objects.all()
            page = URL.objects.get(original=url_original)
            cortada = len(pages) + 1

            #page = URL.objects.get(original=url_original)
            template = get_template('realizada.html')
            context = ({'original':page.original, 'cortada':page.cortada})
            respuesta = template.render(context)

        except URL.DoesNotExist:
            pages = URL.objects.all()

            corta = len(pages) + 1
            url_new = URL(original=url_original, cortada=corta)
            url_new.save()
            template = get_template('acortada.html')
            context = ({'original':url_original, 'cortada':corta})
            respuesta =  template.render(context)

        return HttpResponse(respuesta)
def redirect_url(url_hash):
    long_url = URL.get_url(url_hash)

    if not long_url:
        return {
            'status': 400,
            'message': 'URL does not exist. Please try another one.'
        }, 400

    return redirect(long_url), 302
Example #10
0
def shorten_url(url: str):
    if not (url.startswith('https://') or url.startswith('http://')):
        url = 'http://' + url
    parsed_url = urlparse(url)
    domain = parsed_url.netloc
    if not domain.startswith('www.'):
        domain = 'www.' + domain
    path = parsed_url.path
    sha_signature = hashlib.sha256((domain + path).encode()).hexdigest()

    existing_urls = URL.query.filter(URL.domain == domain,
                                     URL.path == path).all()
    if existing_urls:
        return 'localhost:5000/shorten/redirect/' + existing_urls[0].hash

    obj = URL(domain=domain, path=path, hash=sha_signature, original_url=url)
    db.session.add(obj)
    db.session.commit()

    return 'localhost:5000/shorten/redirect/' + obj.hash
Example #11
0
    def post(self):
        url = urlfetch.fetch(self.request.get('content'))
        doc = lxml.html.fromstring(url.content)

        for sku in doc.cssselect('span'):
            if sku.get('id') == 'displaySkuCode':
                item_id = sku.text

        registry_item = Item.get_by_id(item_id)
        if registry_item is None:
            registry_item = Item(id=item_id,
                                 parent=ndb.Key("Website",
                                                self.request.get('website')))

        for tag in doc.cssselect('title'):
            registry_item.title = tag.text.encode('utf-8')

        for img in doc.cssselect('link'):
            if img.get('rel') == 'image_src':
                registry_item.img = img.get('href')

        for price in doc.cssselect('div'):
            if price.get('class') == 'sale-price':
                for savings in price.cssselect('strong'):
                    registry_item.sale = savings.text.encode('utf-8')
            if price.get('class') == 'reg-price':
                registry_item.price = price.text.encode('utf-8').replace(
                    "Reg.", "")

        registry_item.category = self.request.get('category')
        registry_item.for_who = self.request.get('for_who')
        registry_item.put()

        link = URL.get_by_id(item_id)
        if link is None:
            link = URL(id=item_id,
                       parent=ndb.Key("Website", self.request.get('website')))
        link.url = self.request.get('content')
        link.last_scrape = datetime.datetime.now()
        link.put()

        self.redirect('/listings')
Example #12
0
  def post(self):
    url = urlfetch.fetch(self.request.get('content'))
    doc = lxml.html.fromstring(url.content)

    for sku in doc.cssselect('span'):
      if sku.get('id') == 'displaySkuCode':
        item_id = sku.text

    registry_item = Item.get_by_id(item_id)
    if registry_item is None:
      registry_item = Item(id=item_id, parent=ndb.Key("Website", self.request.get('website')))

    for tag in doc.cssselect('title'):
      registry_item.title = tag.text.encode('utf-8')

    for img in doc.cssselect('link'):
      if img.get('rel') == 'image_src':
        registry_item.img = img.get('href')

    for price in doc.cssselect('div'):
      if price.get('class') == 'sale-price':
        for savings in price.cssselect('strong'):
          registry_item.sale = savings.text.encode('utf-8')
      if price.get('class') == 'reg-price':
        registry_item.price = price.text.encode('utf-8').replace("Reg.", "")
    
    registry_item.category = self.request.get('category')
    registry_item.for_who = self.request.get('for_who')
    registry_item.put()

    link = URL.get_by_id(item_id)
    if link is None:
      link = URL(id=item_id, parent=ndb.Key("Website", self.request.get('website')))
    link.url = self.request.get('content')
    link.last_scrape = datetime.datetime.now()
    link.put()

    self.redirect('/listings')
Example #13
0
            session.commit()

            #
            # if it's not a retweet
            #
            if not parentTweet:
                links = re.findall(r"http:\/\/t.co\/[a-zA-Z0-9\-\.]+", text)
                if links:
                    #
                    # let's remove the duplicates
                    #
                    links = list(set(links))
                    for link in links:
                        #
                        # if it's a real link
                        #
                        if is_ascii(link):
                            fetchedURL = session.query(URL).filter(
                                URL.shortAddress == link).first()
                            if not fetchedURL:
                                l = URL(link, u.ID)
                                session.add(l)
                                fetchedURL = l

                            fetchedURL.tweets.append(u)

            session.commit()
            count += 1
            if count % 1000 == 0:
                print "processed " + str(count) + " tweets"
Example #14
0
def parse_geometries(session, activity_tracker):
	# get names of file to read and the last arrangement
	geofile = os.path.join(os.path.expanduser(cfg.CURRENT_DIR), cfg.GEOLOG)
	q = session.query(Arrangement).order_by(Arrangement.id.desc()).first()
	last_arr = ast.literal_eval(q.arr) if q else {}

	# clean up dictionary for equal comparison
	for a, va in last_arr.iteritems():
		del va['pid']
		for w, vw in va['windows'].iteritems():
			del vw['wid']
			del vw['gid']

	# check if db file
	if os.path.isfile(geofile):
		f = open(geofile, 'r+')
		lines_to_save = []

		# get existing apps from the database
		apps = session.query(App).all()
		app_names = [a.name for a in apps]

		# get existing windows from the database
		windows = session.query(Window).all()
		window_names = [w.title for w in windows]

		# get existing geometries from the database
		geometries = session.query(Geometry).all()
		geometry_dicts = [[g.x, g.y, g.w, g.h] for g in geometries]

		# get existing urls from the database
		db_urls = session.query(URL).all()
		urls = [d.url for d in db_urls]

		for line in f:
			try:
				# get data
				text = ast.literal_eval(line.rstrip())
				t = text['time']
				arrangement = text['geometry']

				# if this is a duplicate of the last arrangement, don't record it
				if arrangement == last_arr:
					continue

				# check for new windows opened or activated
				for app, value in arrangement.iteritems():
					app_name = value['name']
					active = value['active']
					windows = value['windows']

					# add new apps to the database, but should not need to to this
					if app_name not in app_names:
						# add app to the database
						app_to_add = App(t, app_name)
						session.add(app_to_add)
						activity_tracker.storage.sqlcommit()

						# update our local app list
						apps = session.query(App).all()
						app_names = [a.name for a in apps]

					# add the app's pid to the arrangement dictionary
					pid = app_names.index(app_name) + 1 # array starts at 0, database ids a 1
					value['pid'] = pid # removing for now, may want to add back later, but will need new arrangement comparison method

					for window, val in windows.iteritems():
						# get window information
						title = val['name']
						w_active = val['active']
						bounds = val['bounds']
						x = int(bounds['x'])
						y = int(bounds['y'])
						width =	 int(bounds['width'])
						height = int(bounds['height'])

						# add new windows to the database, but should not need to do this
						if title not in window_names:
							# add app to the database
							app_to_add = Window(t, pid, title)
							session.add(app_to_add)
							activity_tracker.storage.sqlcommit()

							# update our local window list
							windows = session.query(Window).all()
							window_names = [w.title for w in windows]

						# add the window's wid to the window dictionary
						wid = window_names.index(title) + 1 # array starts at 0, database ids a 1
						val['wid'] = wid # removing for now, may want to add back later, but will need new arrangement comparison method

						# add new geometries to the database
						gd = [x, y, width, height]
						if gd not in geometry_dicts:
							ge = Geometry(t, x, y, width, height)
							session.add(ge)
							activity_tracker.storage.sqlcommit()

							# update our local geomery list
							geometries = session.query(Geometry).all()
							geometry_dicts = [[g.x, g.y, g.w, g.h] for g in geometries]

						# add the geometry's gid to the window dictionary
						gid = geometry_dicts.index(gd) + 1 # array starts at 0, database ids a 1
						val['gid'] = gid # removing for now, may want to add back later, but will need new arrangement comparison method

						# add new urls to the database
						if 'tabs' in val.keys():
							tabs = val['tabs']
							for tab, tval in tabs.iteritems():
								t_active = tval['active']
								t_url = tval['url']
								if tval['url'] not in urls:
									u = URL(t, tval['title'], tval['url'], tval['host'])
									session.add(u)
									activity_tracker.storage.sqlcommit()

									# update our local urls list
									db_urls = session.query(URL).all()
									urls = [d.url for d in db_urls]

								# add the url's uid to the window dictionary
								uid = urls.index(tval['url']) + 1 # array starts at 0, database ids a 1
								tval['uid'] = uid

								#TODO create url open, close, active, inactive events
								if not app in last_arr:
									te = URLEvent(t, uid, pid, wid, "Open")
									session.add(te)
									if t_active:
										te = URLEvent(t, uid, pid, wid, "Open")
										session.add(te)
								elif not window in last_arr[app]['windows']:
									te = URLEvent(t, uid, pid, wid, "Open")
									session.add(te)
									if t_active:
										te = URLEvent(t, uid, pid, wid, "Active")
										session.add(te)
								elif not tab in last_arr[app]['windows'][window]['tabs']:
									te = URLEvent(t, uid, pid, wid, "Open")
									session.add(te)
									if t_active:
										te = URLEvent(t, uid, pid, wid, "Active")
										session.add(te)
								elif t_active and (not last_arr[app]['windows'][window]['tabs'][tab]['active'] or t_url != last_arr[app]['windows'][window]['tabs'][tab]['url']):
									te = URLEvent(t, uid, pid, wid, "Active")
									session.add(te)

						# create open and active events if...
						# this app was not even open the last time around
						if not app in last_arr:
							we = WindowEvent(t, wid, "Open")
							session.add(we)
							if w_active:
								we = WindowEvent(t, wid, "Active")
								session.add(we)

						else:
							# or if the window was not present last time
							if not window in last_arr[app]['windows']:
								we = WindowEvent(t, wid, "Open")
								session.add(we)
								if w_active:
									we = WindowEvent(t, wid, "Active")
									session.add(we)
							else:
								# or the window was present but not active last time, or had a different name
								if w_active and (not last_arr[app]['windows'][window]['active'] or title != last_arr[app]['windows'][window]['name']):
									we = WindowEvent(t, wid, "Active")
									session.add(we)

				# add new arrangement to the database
				arr_to_add = Arrangement(t, str(arrangement))
				session.add(arr_to_add)

				# look now at the last arrangement to see what has closed or gone inactive
				for app, value in last_arr.iteritems():
					app_name = value['name']
					active = value['active']
					windows = value['windows']

					if app_name not in app_names:
						# add app to the database
						app_to_add = App(t, app_name)
						session.add(app_to_add)
						#TODO catch if our commit fails
						activity_tracker.storage.sqlcommit()

						# update our local app list
						apps = session.query(App).all()
						app_names = [a.name for a in apps]

					pid = app_names.index(app_name) + 1 # array starts at 0, database ids a 1

					for window, val in windows.iteritems():
						# get window information
						title = val['name']
						w_active = val['active']
						bounds = val['bounds']
						x = bounds['x']
						y = bounds['y']
						w =	 bounds['width']
						h = bounds['height']

						if title not in window_names:
							# add app to the database
							app_to_add = Window(t, pid, title)
							session.add(app_to_add)
							#TODO catch if our commit fails
							activity_tracker.storage.sqlcommit()

							# update our local app list
							windows = session.query(Window).all()
							window_names = [w.title for w in windows]

						wid = window_names.index(title) + 1 # array starts at 0, database ids a 1

						if 'tabs' in val.keys():
							tabs = val['tabs']
							for tab, tval in tabs.iteritems():
								uid = urls.index(tval['url']) + 1
								t_active = tval['active']
								t_url = tval['url']
								if not app in arrangement:
									te = URLEvent(t, uid, pid, wid, "Close")
									session.add(te)
									if t_active:
										te = URLEvent(t, uid, pid, wid, "Inactive")
										session.add(te)
								elif not window in arrangement[app]['windows']:
									te = URLEvent(t, uid, pid, wid, "Close")
									session.add(te)
									if t_active:
										te = URLEvent(t, uid, pid, wid, "Inactive")
										session.add(te)
								elif not tab in arrangement[app]['windows'][window]['tabs']:
									te = URLEvent(t, uid, pid, wid, "Close")
									session.add(te)
									if t_active:
										te = URLEvent(t, uid, pid, wid, "Inactive")
										session.add(te)
								elif t_active and (not arrangement[app]['windows'][window]['tabs'][tab]['active'] or t_url != arrangement[app]['windows'][window]['tabs'][tab]['url']):
									te = URLEvent(t, uid, pid, wid, "Inactive")
									session.add(te)

						# create close and inactive events if...
						# this app is not longer present
						if not app in arrangement:
							we = WindowEvent(t, wid, "Close")# create open event
							session.add(we)
							if w_active:
								we = WindowEvent(t, wid, "Inactive")# create open event
								session.add(we)

						else:
							# or if the window is not longer present
							if not window in arrangement[app]['windows']:
								we = WindowEvent(t, wid, "Close")# create open event
								session.add(we)
								if w_active:
									we = WindowEvent(t, wid, "Inactive")# create open event
									session.add(we)
							else:
								# or the window is present but no longer active, or has a different name
								if w_active and (not arrangement[app]['windows'][window]['active'] or title != arrangement[app]['windows'][window]['name']):
									we = WindowEvent(t, wid, "Inactive")# create open event
									session.add(we)

				last_arr = copy.deepcopy(arrangement)

			except:
				print "Could not save " + str(line) + " to the database. Saving for the next round of parsing."
				lines_to_save.append(line)

		# write lines that did not make it into the database to the start of the
		# file and delete the rest of the file
		f.seek(0)
		for line in lines_to_save:
			f.write(line)
		f.truncate()
		f.close()
Example #15
0
def url(request, url):
			
	if request.method == 'GET':
		if url == '':
			respuesta = '<form method="POST" action="">' \
				+ 'URL: <input type="text" name="url"><br>' \
				+ '<input type="submit" value="Enviar"><br>' \
				+ '</form>' 
			paginas = URL.objects.all()
			for pagina in paginas:
				respuesta += '<li><a href="/' + str(pagina.URLcorta) + '">' + str(pagina.URLlarga) + '</a>'
				respuesta += '<li><a href="/' + str(pagina.URLcorta) + '">' + str(pagina.URLcorta) + '</a>'
			
		else:

			try:
				pagina = URL.objects.get(URLcorta=url)
				respuesta = '<html><head><meta http-equiv="Refresh" content="5;url='+ pagina.URLlarga +'"></head>' \
				+ "<body><h1> Espere, va a ser redirigido en 5 segundos... " \
				+ "</h1></body></html>"
			except URL.DoesNotExist:
				try:
					url = 'http://localhost:1234/' + str(url)
					pagina = URL.objects.get(URLcorta=url)
					respuesta = '<html><head><meta http-equiv="Refresh" content="5;url='+ pagina.URLlarga +'"></head>' \
						+ "<body><h1> Espere, va a ser redirigido en 5 segundos... " \
						+ "</h1></body></html>"
				except URL.DoesNotExist:
					respuesta = '<h1><font color ="red">Lo sentimos esta pagina no ha sido almacenada.</font></h1>'
		return HttpResponse(respuesta)
	
	elif request.method == 'POST' or request.method == 'PUT':
		urlparaacortar = request.body.split("=")[1]
		urlparaacortar = urllib.unquote(urlparaacortar).decode('utf8')
		http = urlparaacortar.split("://")[0]

		if (http != 'http') and (http != 'https'):
			urlparaacortar = 'https://' + str(urlparaacortar)
		
		try:
			urlcorta = URL.objects.get(URLlarga=urlparaacortar)
			respuesta = '<h1>Esta URL ya ha sido acortada </h1></br>'\
				+'<html><body><a href="'+ urlparaacortar +'">' + urlparaacortar + ' </a></br></body></html>'\
				+ '<html><body><a href="'+ urlcorta.URLcorta +'">'+ urlcorta.URLcorta + ' </a></br></body></html>' 
		
		except URL.DoesNotExist:
			paginas = URL.objects.all()
			contador = 0
			for pagina in paginas:
				contador = contador + 1
			urlnuevacorta = 'http://localhost:1234/' + str(contador)
			p = URL(URLcorta=urlnuevacorta, URLlarga=urlparaacortar)
			p.save()
			pagina = URL.objects.get(URLcorta=urlnuevacorta)
			respuesta = "<h1>Se ha acortado la URL de forma correcta</br></h1>" \
				+'<a href="'+ str(pagina.URLlarga) +'">' + str(pagina.URLlarga) + ' </a></br>'\
				+ '<a href="'+ str(pagina.URLcorta) +'">'+ str(pagina.URLcorta) + ' </a></br>'

		return HttpResponse(respuesta)

	else:
		respuesta= 'Ha ocurrido un error'
		return HttpResponse(respuesta)