Exemple #1
0
def do_get_addresses(site):
    if isinstance(site,SearchResult):
        site = site.site
    if isinstance(site,SiteData):
        site = site.site
    if not isinstance(site,Site):
        site = Site.objects.get(id=site)

    res = addrutil(site.url)

    site_attr = SiteAttributes(site=site)
    site_attr.save()

    ips = map(
        lambda ip: Ip.objects.get_or_create(**ip)[0],
        res.get('addresses',())
    )
    if ips:
        site_attr.ip.add(*ips)
        site_attr.save()
    whois = map(
        lambda whois: Whois.objects.get_or_create(**whois)[0],
        res.get('whois',())
    )
    if whois:
        site_attr.whois.add(*whois)
        site_attr.save()
Exemple #2
0
def do_get_addresses(site):
    if isinstance(site, SearchResult):
        site = site.site
    if isinstance(site, SiteData):
        site = site.site
    if not isinstance(site, Site):
        site = Site.objects.get(id=site)

    res = addrutil(site.url)

    site_attr = SiteAttributes(site=site)
    site_attr.save()

    ips = map(lambda ip: Ip.objects.get_or_create(**ip)[0],
              res.get('addresses', ()))
    if ips:
        site_attr.ip.add(*ips)
        site_attr.save()
    whois = map(lambda whois: Whois.objects.get_or_create(**whois)[0],
                res.get('whois', ()))
    if whois:
        site_attr.whois.add(*whois)
        site_attr.save()
Exemple #3
0
def update_site_details(site, attributes):

    try:
        res = addrutil(site.url) # add data from DNS & WHOIS

        ips = set(map(
            lambda ip: Ip.objects.get_or_create(**ip)[0],
            res.get('addresses',())
        ))
        whois = set(map(
            lambda whois: Whois.objects.get_or_create(**whois)[0],
            res.get('whois',())
        ))

        if ips != frozenset(attributes.ip.all()) or whois != frozenset(attributes.whois.all()):
            site_attr = SiteAttributes(site=site)
            site_attr.save()

            if whois:
                site_attr.whois.add(*whois)
            elif attributes:
                site_attr.whois.add(*attributes.whois.all())

            if ips:
                site_attr.ip.add(*ips)
            elif attributes:
                site_attr.ip.add(*attributes.ip.all())

            site_attr.save()

            attributes =  site_attr

    except:
        pass

    return attributes
Exemple #4
0
def update_site_details(site, attributes):

    try:
        res = addrutil(site.url) # add data from DNS & WHOIS

        ips = set(map(
            lambda ip: Ip.objects.get_or_create(**ip)[0],
            res.get('addresses',())
        ))
        whois = set(map(
            lambda whois: Whois.objects.get_or_create(**whois)[0],
            res.get('whois',())
        ))

        if ips != frozenset(attributes.ip.all()) or whois != frozenset(attributes.whois.all()):
            site_attr = SiteAttributes(site=site)
            site_attr.save()

            if whois:
                site_attr.whois.add(*whois)
            elif attributes:
                site_attr.whois.add(*attributes.whois.all())

            if ips:
                site_attr.ip.add(*ips)
            elif attributes:
                site_attr.ip.add(*attributes.ip.all())

            site_attr.save()

            attributes =  site_attr

    except:
        pass

    return attributes
Exemple #5
0
    def post(request, forms):
        form = forms['search']
        if not form.is_valid():
            return {}
        data = form.data
        query = data.get('q')
        engine = data.get('engine')

        if not query:
            return {}

        engine = Engine.objects.get(id=engine)

        if engine.symbol not in engines:
            return {}

        user_keywords = Keyword.objects.filter(group__in=get_user(request).groups.all())

        search = Search.objects.create(engine=engine, q=query)
        search.save()

        for res in itake(20, engines[engine.symbol](q=query)):
            res.update(addrutil(res['url'])) # add data from DNS & WHOIS
            bare_url = url_depath(res['url']) # remove path from url
            keywords = []
            category = None
            site = None
            try:
                site = Site.objects.get(url=bare_url)
            except Exception:
                pass

            if site:
                # once processed in this run, we continue
                if SearchResult.objects.filter(site=site, search=search):
                    continue


            fresh = False
            if not site:
                scraped_keywords = root_page_keywords(bare_url)
                for kw in user_keywords:
                    try: kwrd = unidecode(kw.keyword.decode('utf-8')).lower()
                    except:kwrd = kw.keyword
                    if kwrd in scraped_keywords:
                        category = kw.category
                        keywords.append(kw)
                        break

                site = Site(name=res.get('title'), url=bare_url, category=category, banned=False)
                site.save()

                site_attr = SiteAttributes(site=site)
                site_attr.save()

                ips = map(
                    lambda ip: Ip.objects.get_or_create(**ip)[0],
                    res.get('addresses',())
                )
                if ips:
                    site_attr.ip.add(*ips)
                    site_attr.save()
                whois = map(
                    lambda whois: Whois.objects.get_or_create(**whois)[0],
                    res.get('whois',())
                )
                if whois:
                    site_attr.whois.add(*whois)

                fresh = True

            search_result = SearchResult(search=search, sequence=res.get('_seq'), site=site, fresh=fresh)
            search_result.save()
            if keywords:
                search_result.keyword.add(*keywords)
            search_result.save()
        return { 'searchid': search.id }