def do_get_addresses(site): if isinstance(site,SearchResult): site = site.site if isinstance(site,SiteData): site = site.site if not isinstance(site,Site): site = Site.objects.get(id=site) res = addrutil(site.url) site_attr = SiteAttributes(site=site) site_attr.save() ips = map( lambda ip: Ip.objects.get_or_create(**ip)[0], res.get('addresses',()) ) if ips: site_attr.ip.add(*ips) site_attr.save() whois = map( lambda whois: Whois.objects.get_or_create(**whois)[0], res.get('whois',()) ) if whois: site_attr.whois.add(*whois) site_attr.save()
def do_get_addresses(site): if isinstance(site, SearchResult): site = site.site if isinstance(site, SiteData): site = site.site if not isinstance(site, Site): site = Site.objects.get(id=site) res = addrutil(site.url) site_attr = SiteAttributes(site=site) site_attr.save() ips = map(lambda ip: Ip.objects.get_or_create(**ip)[0], res.get('addresses', ())) if ips: site_attr.ip.add(*ips) site_attr.save() whois = map(lambda whois: Whois.objects.get_or_create(**whois)[0], res.get('whois', ())) if whois: site_attr.whois.add(*whois) site_attr.save()
def update_site_details(site, attributes): try: res = addrutil(site.url) # add data from DNS & WHOIS ips = set(map( lambda ip: Ip.objects.get_or_create(**ip)[0], res.get('addresses',()) )) whois = set(map( lambda whois: Whois.objects.get_or_create(**whois)[0], res.get('whois',()) )) if ips != frozenset(attributes.ip.all()) or whois != frozenset(attributes.whois.all()): site_attr = SiteAttributes(site=site) site_attr.save() if whois: site_attr.whois.add(*whois) elif attributes: site_attr.whois.add(*attributes.whois.all()) if ips: site_attr.ip.add(*ips) elif attributes: site_attr.ip.add(*attributes.ip.all()) site_attr.save() attributes = site_attr except: pass return attributes
def post(request, forms): form = forms['search'] if not form.is_valid(): return {} data = form.data query = data.get('q') engine = data.get('engine') if not query: return {} engine = Engine.objects.get(id=engine) if engine.symbol not in engines: return {} user_keywords = Keyword.objects.filter(group__in=get_user(request).groups.all()) search = Search.objects.create(engine=engine, q=query) search.save() for res in itake(20, engines[engine.symbol](q=query)): res.update(addrutil(res['url'])) # add data from DNS & WHOIS bare_url = url_depath(res['url']) # remove path from url keywords = [] category = None site = None try: site = Site.objects.get(url=bare_url) except Exception: pass if site: # once processed in this run, we continue if SearchResult.objects.filter(site=site, search=search): continue fresh = False if not site: scraped_keywords = root_page_keywords(bare_url) for kw in user_keywords: try: kwrd = unidecode(kw.keyword.decode('utf-8')).lower() except:kwrd = kw.keyword if kwrd in scraped_keywords: category = kw.category keywords.append(kw) break site = Site(name=res.get('title'), url=bare_url, category=category, banned=False) site.save() site_attr = SiteAttributes(site=site) site_attr.save() ips = map( lambda ip: Ip.objects.get_or_create(**ip)[0], res.get('addresses',()) ) if ips: site_attr.ip.add(*ips) site_attr.save() whois = map( lambda whois: Whois.objects.get_or_create(**whois)[0], res.get('whois',()) ) if whois: site_attr.whois.add(*whois) fresh = True search_result = SearchResult(search=search, sequence=res.get('_seq'), site=site, fresh=fresh) search_result.save() if keywords: search_result.keyword.add(*keywords) search_result.save() return { 'searchid': search.id }