def test_create_url(self): p = Page() p.save() u = URL(TEST_URL) u.add_content(p) self.assertTrue(p.content_ptr in u.get_content(), "URL does not point to correct content.")
def acortar(request): if request.method == "GET": URLs_guardadas = URL.objects.all() plantilla_principal = get_template("pagina_principal.html") Context = ({'lista_urls': URLs_guardadas}) return HttpResponse(plantilla_principal.render(Context)) elif request.method == "POST": url = request.POST.get("url") if url == "": mensaje = "Está vacío" plantilla_error = get_template("pagina_error.html") Context = ({'mensaje': mensaje}) return HttpResponseBadRequest(plantilla_error.render(Context)) elif not url.startswith("http://") and not url.startswith("https://"): url = "http://" + url try: nueva_URL = URL.objects.get(direccion_URL=url) except URL.DoesNotExist: nueva_URL = URL(direccion_URL=url) nueva_URL.save() plantilla_seleccion = get_template("pagina_seleccion.html") id = nueva_URL.id Context = ({'id': id}) return HttpResponse(plantilla_seleccion.render(Context)) else: mensaje = "Algo ha ido mal" plantilla_error = get_template("pagina_error.html") Context = ({'mensaje': mensaje}) return HttpResponse(plantilla_error.render(Context))
def test_lookup_by_url(self): p = Page() p.save() u = URL(TEST_URL) u.add_content(p) found = URL.objects.get(url=TEST_URL) self.assertTrue(p.content_ptr in found.get_content(), "Unable to find correct content by URL.")
def generate_hash(): if 'url' not in request.json or type(request.json['url']) != str: return { 'status': 400, 'message': 'url either not found in body or invalid' }, 400 long_url = request.json['url'] base_url = request.base_url base_url = base_url.replace('/generate', '') protocol = long_url.split('://')[0] if protocol != 'http' and protocol != 'https': return { 'status': 400, 'message': 'Invalid url in request body. URL must start with http or https' }, 400 url_hash = URL.save_hash(long_url) return { 'status': 200, 'message': 'URL generated successfully', 'url': f'{base_url}/{url_hash}' }, 200
def insert_url(url:str, code:str) -> int: """Inserts a new URL""" q = URL.insert(url=url, code=code) url_id = q.execute() Stats.insert(url=url_id, usage_count=0).execute() return url_id
def parse_urls(session): # get name of file to read urlfile = os.path.join(os.path.expanduser(cfg.CURRENT_DIR), cfg.URLLOG) # read the file, write lines to database, and save lines that were not # written to the database # TODO may need to check if file is already open using a file lock if os.path.isfile(urlfile): f = open(urlfile, 'r+') lines_to_save = [] for line in f: try: text = ast.literal_eval(line.rstrip()) url = URL(unicode(text['time']), unicode(text['browser']), unicode(text['title']), unicode(text['url']), unicode(text['event'])) session.add(url) except: print "Could not save " + str(line) + " to the database. Saving for the next round of parsing." lines_to_save.append(line) # write lines that did not make it into the database to the start of the # file and delete the rest of the file f.seek(0) for line in lines_to_save: f.write(line) f.truncate() f.close()
def add(request): if request.method == 'POST': form = AddUserLinkForm(request.POST) if form.is_valid(): if not form.cleaned_data['link'].startswith('http://'): form.cleaned_data['link'] = 'http://' + form.cleaned_data['link'] if form.is_valid(): cd = form.cleaned_data url = URL() url.name = cd['label'] url.other_profile = cd['link'] url.user = request.user.get_profile() url.type = URL.PERFORMANCES url.save() request.user.get_profile().save() return HttpResponseRedirect( '/links/' + request.user.get_profile().name + '/' ) else: form = AddUserLinkForm() return render_to_response( 'add_user_link.html', { 'form': form, 'all_tags': Tag.objects.all() }, RequestContext(request))
def acorta(request): if request.method == 'GET': template = get_template('plantilla.html') context = ({}) return HttpResponse(template.render(context)) elif request.method == 'POST': url_original = request.POST['original'] print str(url_original) if not url_original.find("http://"): #print "tiene http://" pass elif not url_original.find("https://"): #print "Tiene el https, no hace falta ponerlo" pass else: #print "No lo tiene, ponemos el http://" url_original = "http://" + url_original try: pages = URL.objects.all() page = URL.objects.get(original=url_original) cortada = len(pages) + 1 #page = URL.objects.get(original=url_original) template = get_template('realizada.html') context = ({'original':page.original, 'cortada':page.cortada}) respuesta = template.render(context) except URL.DoesNotExist: pages = URL.objects.all() corta = len(pages) + 1 url_new = URL(original=url_original, cortada=corta) url_new.save() template = get_template('acortada.html') context = ({'original':url_original, 'cortada':corta}) respuesta = template.render(context) return HttpResponse(respuesta)
def redirect_url(url_hash): long_url = URL.get_url(url_hash) if not long_url: return { 'status': 400, 'message': 'URL does not exist. Please try another one.' }, 400 return redirect(long_url), 302
def shorten_url(url: str): if not (url.startswith('https://') or url.startswith('http://')): url = 'http://' + url parsed_url = urlparse(url) domain = parsed_url.netloc if not domain.startswith('www.'): domain = 'www.' + domain path = parsed_url.path sha_signature = hashlib.sha256((domain + path).encode()).hexdigest() existing_urls = URL.query.filter(URL.domain == domain, URL.path == path).all() if existing_urls: return 'localhost:5000/shorten/redirect/' + existing_urls[0].hash obj = URL(domain=domain, path=path, hash=sha_signature, original_url=url) db.session.add(obj) db.session.commit() return 'localhost:5000/shorten/redirect/' + obj.hash
def post(self): url = urlfetch.fetch(self.request.get('content')) doc = lxml.html.fromstring(url.content) for sku in doc.cssselect('span'): if sku.get('id') == 'displaySkuCode': item_id = sku.text registry_item = Item.get_by_id(item_id) if registry_item is None: registry_item = Item(id=item_id, parent=ndb.Key("Website", self.request.get('website'))) for tag in doc.cssselect('title'): registry_item.title = tag.text.encode('utf-8') for img in doc.cssselect('link'): if img.get('rel') == 'image_src': registry_item.img = img.get('href') for price in doc.cssselect('div'): if price.get('class') == 'sale-price': for savings in price.cssselect('strong'): registry_item.sale = savings.text.encode('utf-8') if price.get('class') == 'reg-price': registry_item.price = price.text.encode('utf-8').replace( "Reg.", "") registry_item.category = self.request.get('category') registry_item.for_who = self.request.get('for_who') registry_item.put() link = URL.get_by_id(item_id) if link is None: link = URL(id=item_id, parent=ndb.Key("Website", self.request.get('website'))) link.url = self.request.get('content') link.last_scrape = datetime.datetime.now() link.put() self.redirect('/listings')
def post(self): url = urlfetch.fetch(self.request.get('content')) doc = lxml.html.fromstring(url.content) for sku in doc.cssselect('span'): if sku.get('id') == 'displaySkuCode': item_id = sku.text registry_item = Item.get_by_id(item_id) if registry_item is None: registry_item = Item(id=item_id, parent=ndb.Key("Website", self.request.get('website'))) for tag in doc.cssselect('title'): registry_item.title = tag.text.encode('utf-8') for img in doc.cssselect('link'): if img.get('rel') == 'image_src': registry_item.img = img.get('href') for price in doc.cssselect('div'): if price.get('class') == 'sale-price': for savings in price.cssselect('strong'): registry_item.sale = savings.text.encode('utf-8') if price.get('class') == 'reg-price': registry_item.price = price.text.encode('utf-8').replace("Reg.", "") registry_item.category = self.request.get('category') registry_item.for_who = self.request.get('for_who') registry_item.put() link = URL.get_by_id(item_id) if link is None: link = URL(id=item_id, parent=ndb.Key("Website", self.request.get('website'))) link.url = self.request.get('content') link.last_scrape = datetime.datetime.now() link.put() self.redirect('/listings')
session.commit() # # if it's not a retweet # if not parentTweet: links = re.findall(r"http:\/\/t.co\/[a-zA-Z0-9\-\.]+", text) if links: # # let's remove the duplicates # links = list(set(links)) for link in links: # # if it's a real link # if is_ascii(link): fetchedURL = session.query(URL).filter( URL.shortAddress == link).first() if not fetchedURL: l = URL(link, u.ID) session.add(l) fetchedURL = l fetchedURL.tweets.append(u) session.commit() count += 1 if count % 1000 == 0: print "processed " + str(count) + " tweets"
def parse_geometries(session, activity_tracker): # get names of file to read and the last arrangement geofile = os.path.join(os.path.expanduser(cfg.CURRENT_DIR), cfg.GEOLOG) q = session.query(Arrangement).order_by(Arrangement.id.desc()).first() last_arr = ast.literal_eval(q.arr) if q else {} # clean up dictionary for equal comparison for a, va in last_arr.iteritems(): del va['pid'] for w, vw in va['windows'].iteritems(): del vw['wid'] del vw['gid'] # check if db file if os.path.isfile(geofile): f = open(geofile, 'r+') lines_to_save = [] # get existing apps from the database apps = session.query(App).all() app_names = [a.name for a in apps] # get existing windows from the database windows = session.query(Window).all() window_names = [w.title for w in windows] # get existing geometries from the database geometries = session.query(Geometry).all() geometry_dicts = [[g.x, g.y, g.w, g.h] for g in geometries] # get existing urls from the database db_urls = session.query(URL).all() urls = [d.url for d in db_urls] for line in f: try: # get data text = ast.literal_eval(line.rstrip()) t = text['time'] arrangement = text['geometry'] # if this is a duplicate of the last arrangement, don't record it if arrangement == last_arr: continue # check for new windows opened or activated for app, value in arrangement.iteritems(): app_name = value['name'] active = value['active'] windows = value['windows'] # add new apps to the database, but should not need to to this if app_name not in app_names: # add app to the database app_to_add = App(t, app_name) session.add(app_to_add) activity_tracker.storage.sqlcommit() # update our local app list apps = session.query(App).all() app_names = [a.name for a in apps] # add the app's pid to the arrangement dictionary pid = app_names.index(app_name) + 1 # array starts at 0, database ids a 1 value['pid'] = pid # removing for now, may want to add back later, but will need new arrangement comparison method for window, val in windows.iteritems(): # get window information title = val['name'] w_active = val['active'] bounds = val['bounds'] x = int(bounds['x']) y = int(bounds['y']) width = int(bounds['width']) height = int(bounds['height']) # add new windows to the database, but should not need to do this if title not in window_names: # add app to the database app_to_add = Window(t, pid, title) session.add(app_to_add) activity_tracker.storage.sqlcommit() # update our local window list windows = session.query(Window).all() window_names = [w.title for w in windows] # add the window's wid to the window dictionary wid = window_names.index(title) + 1 # array starts at 0, database ids a 1 val['wid'] = wid # removing for now, may want to add back later, but will need new arrangement comparison method # add new geometries to the database gd = [x, y, width, height] if gd not in geometry_dicts: ge = Geometry(t, x, y, width, height) session.add(ge) activity_tracker.storage.sqlcommit() # update our local geomery list geometries = session.query(Geometry).all() geometry_dicts = [[g.x, g.y, g.w, g.h] for g in geometries] # add the geometry's gid to the window dictionary gid = geometry_dicts.index(gd) + 1 # array starts at 0, database ids a 1 val['gid'] = gid # removing for now, may want to add back later, but will need new arrangement comparison method # add new urls to the database if 'tabs' in val.keys(): tabs = val['tabs'] for tab, tval in tabs.iteritems(): t_active = tval['active'] t_url = tval['url'] if tval['url'] not in urls: u = URL(t, tval['title'], tval['url'], tval['host']) session.add(u) activity_tracker.storage.sqlcommit() # update our local urls list db_urls = session.query(URL).all() urls = [d.url for d in db_urls] # add the url's uid to the window dictionary uid = urls.index(tval['url']) + 1 # array starts at 0, database ids a 1 tval['uid'] = uid #TODO create url open, close, active, inactive events if not app in last_arr: te = URLEvent(t, uid, pid, wid, "Open") session.add(te) if t_active: te = URLEvent(t, uid, pid, wid, "Open") session.add(te) elif not window in last_arr[app]['windows']: te = URLEvent(t, uid, pid, wid, "Open") session.add(te) if t_active: te = URLEvent(t, uid, pid, wid, "Active") session.add(te) elif not tab in last_arr[app]['windows'][window]['tabs']: te = URLEvent(t, uid, pid, wid, "Open") session.add(te) if t_active: te = URLEvent(t, uid, pid, wid, "Active") session.add(te) elif t_active and (not last_arr[app]['windows'][window]['tabs'][tab]['active'] or t_url != last_arr[app]['windows'][window]['tabs'][tab]['url']): te = URLEvent(t, uid, pid, wid, "Active") session.add(te) # create open and active events if... # this app was not even open the last time around if not app in last_arr: we = WindowEvent(t, wid, "Open") session.add(we) if w_active: we = WindowEvent(t, wid, "Active") session.add(we) else: # or if the window was not present last time if not window in last_arr[app]['windows']: we = WindowEvent(t, wid, "Open") session.add(we) if w_active: we = WindowEvent(t, wid, "Active") session.add(we) else: # or the window was present but not active last time, or had a different name if w_active and (not last_arr[app]['windows'][window]['active'] or title != last_arr[app]['windows'][window]['name']): we = WindowEvent(t, wid, "Active") session.add(we) # add new arrangement to the database arr_to_add = Arrangement(t, str(arrangement)) session.add(arr_to_add) # look now at the last arrangement to see what has closed or gone inactive for app, value in last_arr.iteritems(): app_name = value['name'] active = value['active'] windows = value['windows'] if app_name not in app_names: # add app to the database app_to_add = App(t, app_name) session.add(app_to_add) #TODO catch if our commit fails activity_tracker.storage.sqlcommit() # update our local app list apps = session.query(App).all() app_names = [a.name for a in apps] pid = app_names.index(app_name) + 1 # array starts at 0, database ids a 1 for window, val in windows.iteritems(): # get window information title = val['name'] w_active = val['active'] bounds = val['bounds'] x = bounds['x'] y = bounds['y'] w = bounds['width'] h = bounds['height'] if title not in window_names: # add app to the database app_to_add = Window(t, pid, title) session.add(app_to_add) #TODO catch if our commit fails activity_tracker.storage.sqlcommit() # update our local app list windows = session.query(Window).all() window_names = [w.title for w in windows] wid = window_names.index(title) + 1 # array starts at 0, database ids a 1 if 'tabs' in val.keys(): tabs = val['tabs'] for tab, tval in tabs.iteritems(): uid = urls.index(tval['url']) + 1 t_active = tval['active'] t_url = tval['url'] if not app in arrangement: te = URLEvent(t, uid, pid, wid, "Close") session.add(te) if t_active: te = URLEvent(t, uid, pid, wid, "Inactive") session.add(te) elif not window in arrangement[app]['windows']: te = URLEvent(t, uid, pid, wid, "Close") session.add(te) if t_active: te = URLEvent(t, uid, pid, wid, "Inactive") session.add(te) elif not tab in arrangement[app]['windows'][window]['tabs']: te = URLEvent(t, uid, pid, wid, "Close") session.add(te) if t_active: te = URLEvent(t, uid, pid, wid, "Inactive") session.add(te) elif t_active and (not arrangement[app]['windows'][window]['tabs'][tab]['active'] or t_url != arrangement[app]['windows'][window]['tabs'][tab]['url']): te = URLEvent(t, uid, pid, wid, "Inactive") session.add(te) # create close and inactive events if... # this app is not longer present if not app in arrangement: we = WindowEvent(t, wid, "Close")# create open event session.add(we) if w_active: we = WindowEvent(t, wid, "Inactive")# create open event session.add(we) else: # or if the window is not longer present if not window in arrangement[app]['windows']: we = WindowEvent(t, wid, "Close")# create open event session.add(we) if w_active: we = WindowEvent(t, wid, "Inactive")# create open event session.add(we) else: # or the window is present but no longer active, or has a different name if w_active and (not arrangement[app]['windows'][window]['active'] or title != arrangement[app]['windows'][window]['name']): we = WindowEvent(t, wid, "Inactive")# create open event session.add(we) last_arr = copy.deepcopy(arrangement) except: print "Could not save " + str(line) + " to the database. Saving for the next round of parsing." lines_to_save.append(line) # write lines that did not make it into the database to the start of the # file and delete the rest of the file f.seek(0) for line in lines_to_save: f.write(line) f.truncate() f.close()
def url(request, url): if request.method == 'GET': if url == '': respuesta = '<form method="POST" action="">' \ + 'URL: <input type="text" name="url"><br>' \ + '<input type="submit" value="Enviar"><br>' \ + '</form>' paginas = URL.objects.all() for pagina in paginas: respuesta += '<li><a href="/' + str(pagina.URLcorta) + '">' + str(pagina.URLlarga) + '</a>' respuesta += '<li><a href="/' + str(pagina.URLcorta) + '">' + str(pagina.URLcorta) + '</a>' else: try: pagina = URL.objects.get(URLcorta=url) respuesta = '<html><head><meta http-equiv="Refresh" content="5;url='+ pagina.URLlarga +'"></head>' \ + "<body><h1> Espere, va a ser redirigido en 5 segundos... " \ + "</h1></body></html>" except URL.DoesNotExist: try: url = 'http://localhost:1234/' + str(url) pagina = URL.objects.get(URLcorta=url) respuesta = '<html><head><meta http-equiv="Refresh" content="5;url='+ pagina.URLlarga +'"></head>' \ + "<body><h1> Espere, va a ser redirigido en 5 segundos... " \ + "</h1></body></html>" except URL.DoesNotExist: respuesta = '<h1><font color ="red">Lo sentimos esta pagina no ha sido almacenada.</font></h1>' return HttpResponse(respuesta) elif request.method == 'POST' or request.method == 'PUT': urlparaacortar = request.body.split("=")[1] urlparaacortar = urllib.unquote(urlparaacortar).decode('utf8') http = urlparaacortar.split("://")[0] if (http != 'http') and (http != 'https'): urlparaacortar = 'https://' + str(urlparaacortar) try: urlcorta = URL.objects.get(URLlarga=urlparaacortar) respuesta = '<h1>Esta URL ya ha sido acortada </h1></br>'\ +'<html><body><a href="'+ urlparaacortar +'">' + urlparaacortar + ' </a></br></body></html>'\ + '<html><body><a href="'+ urlcorta.URLcorta +'">'+ urlcorta.URLcorta + ' </a></br></body></html>' except URL.DoesNotExist: paginas = URL.objects.all() contador = 0 for pagina in paginas: contador = contador + 1 urlnuevacorta = 'http://localhost:1234/' + str(contador) p = URL(URLcorta=urlnuevacorta, URLlarga=urlparaacortar) p.save() pagina = URL.objects.get(URLcorta=urlnuevacorta) respuesta = "<h1>Se ha acortado la URL de forma correcta</br></h1>" \ +'<a href="'+ str(pagina.URLlarga) +'">' + str(pagina.URLlarga) + ' </a></br>'\ + '<a href="'+ str(pagina.URLcorta) +'">'+ str(pagina.URLcorta) + ' </a></br>' return HttpResponse(respuesta) else: respuesta= 'Ha ocurrido un error' return HttpResponse(respuesta)