Ejemplo n.º 1
0
 def __init__(self, *args, **kwargs):
     super(SiteDataForm, self).__init__(*args, **kwargs)
     if self._request:
         group = get_group(self._request)
         qs = self.fields['category'].queryset
         self.fields['category'].queryset = qs.filter(
             group__in=(group, )).distinct()
Ejemplo n.º 2
0
 def __init__(self, *args, **kwargs):
     super(SearchForm, self).__init__(*args, **kwargs)
     if self._request:
         group = get_group(self._request)
         qs = self.fields['engine'].queryset
         self.fields['engine'].queryset = qs.filter(
             group__in=(group, )).distinct()
Ejemplo n.º 3
0
 def __init__(self, *args, **kwargs):
     super(SiteDataForm
         , self).__init__(*args, **kwargs)
     if self._request:
         group = get_group(self._request)
         qs = self.fields['category'].queryset
         self.fields['category'].queryset = qs.filter(group__in=(group,)).distinct()
Ejemplo n.º 4
0
def scrap_info_edit_do(request,  *args, **kwargs):
    #page = request.GET.get('page','1')
    group = get_group(request)
    siteid = request.POST.get('siteid')
    productid = request.POST.get('productid')
    scraperid = request.POST.get('scraperid')
    sitedataid = request.POST.get('sitedataid')
    descriptors = KeyValueDescriptor.objects.filter(active=True, group__in=(group,))
    product = Product.objects.filter(id=productid, group__in=(group,))
    site = SiteData.objects.filter(id=siteid, group__in=(group,))
    sitedata = SiteData.objects.filter(id=sitedataid, group__in=(group,))
    scraper = ScraperDescriptor.objects.filter(id=scraperid)
    if sitedata.exists():
        descriptors = descriptors.filter(target__symbol='SiteInfo')
    else:
        descriptors = descriptors.filter(target__symbol='ProductInfo')

    kvs = []
    for d in descriptors:
        if d.symbol in request.POST:
            if site and d.symbol == 'URL_PROD':
                continue
            if sitedata.exists() and d.symbol == 'URL_SHOP':
                continue
            val = request.POST.get(d.symbol).strip()
            if val:
                kvs.append(KeyValue.objects.create(descriptor=d,value=val))

    if kvs:
        if sitedata.exists():
            si = SiteInfo.objects.create(site=sitedata[0], automatic=False)
            si.values.add(*kvs)
            si.save()
        elif product.exists():
            pi = ProductInfo.objects.create(product=product[0], automatic=False)
            pi.values.add(*kvs)
            pi.save()
        else:
            url = request.POST['URL_PROD']
            name = request.POST['NAME_PROD']
            if not scraper.exists():
                if site.exists():
                    scraper = ScraperDescriptor.objects.create(site=site[0] )
                else:
                    return
            else:
                scraper = scraper[0]
            scraper.url=url
            scraper.name=name
            scraper.items.clear()
            scraper.items.add(*kvs)
            scraper.save()

            page = full_html_page(url)
            obj = json.dumps(page_to_dict(page))
            for st in ScraperTemplate.objects.filter(descriptor=scraper):
                st.active = False
            st = ScraperTemplate.objects.create(descriptor=scraper, value=obj)
            st.save()
Ejemplo n.º 5
0
def get_category(request, group = None):
    if not group:
        group = get_group(request)
    default_category = None
    try:
        default_category = SiteCategory.objects.get(default=True,group__in = (group,))
        default_category = default_category.symbol
    except:
        pass
    return request.GET.get('category', default_category)
Ejemplo n.º 6
0
def scrap_site_info(request, siteid, edit, *args, **kwargs):
    edit = edit == 'edit'
    group = get_group(request)
    descriptors =  KeyValueDescriptor.objects.filter(active=True, group__in=(group,), target__symbol='SiteInfo').order_by('order')
    site = SiteData.objects.get(id=siteid)
    results = get_info(SiteInfo.objects.filter(site__id=siteid), descriptors, edit=edit)
    return {
        'sitedata': site,
        'results': results,
        'edit': edit,
        }
Ejemplo n.º 7
0
def scrap(request, *args, **kwargs):
    page = request.GET.get('page','1')
    banned = 'banned' in request.GET
    group = get_group(request)
    results = Product.objects.filter(group=group,banned=banned)
    results, pages, _ = paginate(results, page, 15)
    return {
        'results': results,
        'pages': pages,
        'banned': banned,
    }
Ejemplo n.º 8
0
def scrap_product_info(request, productid, edit,  *args, **kwargs):
    edit = edit == 'edit'
    group = get_group(request)
    descriptors =  KeyValueDescriptor.objects.filter(active=True, group__in=(group,), target__symbol='ProductInfo').order_by('order')
    product = Product.objects.get(id=productid)
    results = get_info(ProductInfo.objects.filter(product__id=productid), descriptors, edit=edit)
    return {
        'product': product,
        'results': results,
        'edit': edit,
        }
Ejemplo n.º 9
0
def search_add(request, forms):
    group = get_group(request)
    form = forms['add']
    if not form.is_valid():
        return HttpResponseServerError()
    form.save()
    search = form.instance
    search.manual = True
    search.group = group
    search.save()
    messages.success(request, 'Data successfuly saved.')

    return {
        'searchid': search.id
    }
Ejemplo n.º 10
0
def search_results_all(request, forms):

        group = get_group(request)
        category = get_category(request, group)
        format = request.GET.get('format')
        page = request.GET.get('page','1')
        q = request.GET.get('find','')

        categories = SiteCategory.objects.filter(active=True, group__in=(group,)).order_by("id")
        results = SearchResult.objects.filter(search__group = group).distinct('site__id','site__fresh', 'site__site__name')

        if q:
            results = results.filter(site__site__url__icontains=q)

        categories = list((cat,results.filter(site__category=cat,site__banned=False).count()) for cat in categories)
        result_counts = dict(
            banned = 0, # speed up # results.filter(site__banned=True).count(),
            other = results.filter(site__category__isnull=True,site__banned=False).count(),
        )

        if category:
            if category == 'banned':
                results = results.filter(site__banned=True)
            else:
                if category == 'other':
                    results = results.filter(site__category__isnull=True, site__banned=False)
                else:
                    results = results.filter(site__category__symbol=category, site__banned=False)

        results = results.order_by('-site__fresh', 'site__site__name')

        if format == 'csv':
            return get_csv(results, category)
        else:
            results, pages, _ = paginate(results, page, 15)

            return {
                'forms': forms,
                'results': results,
                'pages': pages,
                'categories': categories,
                'category': category,
                'result_counts': result_counts,
                'q': q,
                }
Ejemplo n.º 11
0
def scrap_site_edit(request, siteid, scraperid= None,  *args, **kwargs):
    group = get_group(request)
    site= SiteData.objects.get(id=siteid)
    descriptors =  KeyValueDescriptor.objects.filter(active=True, visible=True, group__in=(group,), target__symbol='ProductInfo').order_by('order')
    items = {}
    scraper = None
    if scraperid:
        scraper = ScraperDescriptor.objects.filter(id=scraperid)
        if scraper.exists():
            scraper = scraper[0]
        items = dict((i.descriptor.symbol, i)  for i in scraper.items.all())
    results = [(items.get(d.symbol),d) for d in descriptors if d.symbol != 'URL_PROD']
    return {
        'site': site,
        'scraper': scraper,
        'results': results,
        'edit': True,
        }
Ejemplo n.º 12
0
def scrap_export(request, typ, attr, *args, **kwargs):
    group = get_group(request)

    site = request.GET.get('site', None)
    product = request.GET.get('product', None)

    category = ('approved',)
    qs = []
    descriptors =  KeyValueDescriptor.objects.filter(active=True, group__in=(group,),)

    if typ == 'product':
        qs = ProductInfo.objects.filter(product__page__site__category__symbol__in = category)
        if site:
            qs = qs.filter(product__page__site__id=site)
        descriptors =  descriptors.filter(target__symbol='ProductInfo')
        fn = 'product_info'
    elif typ == 'site':
        qs = SiteInfo.objects.filter(site__category__symbol__in = category)
        if site:
            qs = qs.filter(site__id = site)
        descriptors =  descriptors.filter(target__symbol='SiteInfo')
        fn = 'site_info'
    elif typ == 'history':
        descriptors =  descriptors.filter(symbol = attr)
        if not descriptors.exists():
            return
        if all(map(lambda d: d.target.symbol == 'SiteInfo', descriptors)):
            qs = SiteInfo.objects.filter(site__category__symbol__in = category, values__descriptor__in = descriptors)
            if site:
                qs = qs.filter(site__id = site)
        elif all(map(lambda d: d.target.symbol == 'ProductInfo', descriptors)):
            qs = ProductInfo.objects.filter(product__page__site__category__symbol__in = category, values__descriptor__in = descriptors)
            if product:
                qs = qs.filter(product__id=product)
        fn = '%s_history' % descriptors[0].symbol
    else:
        return

    descriptors =  descriptors.order_by('order')
    return get_info_csv(qs, descriptors, fn)
Ejemplo n.º 13
0
def scrap_site(request, siteid,  *args, **kwargs):
    page = request.GET.get('page','1')
    banned = 'banned' in request.GET
    group = get_group(request)
    sd = SiteData.objects.filter(id=siteid, group=group)
    if sd.exists():
        sd = sd[0]
        results = Product.objects.filter(page__site=sd,banned=banned)
    else:
        results = []
        sd = None
    scrapers = ScraperDescriptor.objects.filter(site=sd)
    results, pages, _ = paginate(results, page, 15)
    descriptors = KeyValueDescriptor.objects.filter(active=True, group__in=(group,), )
    return {
        'site': sd,
        'results': results,
        'pages': pages,
        'scrapers': scrapers,
        'descriptors': descriptors,
        'screenshots': PrintScreen.objects.filter(site=sd).order_by('-date'),
        'banned': banned,
        }
Ejemplo n.º 14
0
def scrap_plugin(request, *args, **kwargs):
    group = get_group(request)
    descriptors =  KeyValueDescriptor.objects.filter(active=True, group__in=(group,), target__symbol='ProductInfo').order_by('order')
    return {
       'descriptors': descriptors,
    }
Ejemplo n.º 15
0
def site_add(request, forms, searchid):
    group = get_group(request)
    search = Search.objects.get(id=int(searchid))
    nexturl = request.GET.get('nexturl')
    site_form = forms['site']
    search_result_form = forms['search_result']

    try:
        # TODO move it to Model constructor
        site = Site.objects.get(url=url_depath(site_form.data.get('url')))
        site_form = SiteForm(request.POST, instance=site)
    except:
        pass

    sequence = search_result_form.data.get('sequence')
    if not site_form.is_valid() or not search_result_form.is_valid():
        return

    site=site_form.save()

    try:
        site_data = SiteData.objects.get(site=site, group=group)
    except:
        site_data = SiteData.objects.create(site=site, group=group,found=search)

    category = get_category(request, group)
    if category:
        if category == 'banned':
            site_data.banned = True
        else:
            site_data.banned = False
            if category == 'other':
                site_data.category = None
            else:
                site_data.category = SiteCategory.objects.get(symbol = category)

    category_changed(site_data)

    site_data.manual = True
    site_data.save()

    search_result = SearchResult.objects.filter(site=site_data, search=search)
    if search_result:
        search_result = search_result[0]
    else:
        search_result = SearchResult(site=site_data, search=search, )

    str2list = combinator(
        lambda s: list(s) if isinstance(s,tuple) else [s],
        lambda s: eval(s) if s else tuple()
    )
    list2str = lambda s: ','.join(map(str,s))

    sequence = str2list(sequence)
    if search_result:
        sequence += str2list(search_result.sequence)
    sequence = sorted(set(sequence))

    search_result.sequence = list2str(sequence)
    search_result.seq = sequence[0] if len(sequence)>0 else 0
    search_result.save()
Ejemplo n.º 16
0
 def __init__(self, *args, **kwargs):
     super(SearchForm, self).__init__(*args, **kwargs)
     if self._request:
         group = get_group(self._request)
         qs = self.fields['engine'].queryset
         self.fields['engine'].queryset = qs.filter(group__in=(group,)).distinct()
Ejemplo n.º 17
0
def search_results(request, searchid, forms ):

    group = get_group(request)
    category = get_category(request, group)
    format = request.GET.get('format')
    page = request.GET.get('page','1')
    q = request.GET.get('find','')

    try:
        search = Search.objects.get(id=int(searchid), group=group)
    except:
        return HttpResponseNotFound()

    if is_categorizing(search):
        messages.warning(request, 'Categorization under progress. Try refresh to check status.')
    else:
        s = is_success(search)
        if s is not None:
            if s:
                messages.success(request, 'Categorization succesfully ended.')
            else:
                messages.error(request, 'Categorization failed.')

    forms['search'] = forms['search2'] = SearchForm(instance=search, request=request)

    categories = SiteCategory.objects.filter(active=True, group__in=(group,)).order_by("id")
    if search:
        results = SearchResult.objects.filter(search = search)

        if q:
            results = results.filter(site__site__url__icontains=q)

        categories = list((cat,results.filter(site__category=cat,site__banned=False).count()) for cat in categories)
        result_counts = dict(
            banned = 0, # speed up # results.filter(site__banned=True).count(),
            other = results.filter(site__category__isnull=True,site__banned=False).count(),
        )

        if category:
            if category == 'banned':
                results = results.filter(site__banned=True)
            else:
                if category == 'other':
                    results = results.filter(site__category__isnull=True, site__banned=False)
                else:
                    results = results.filter(site__category__symbol=category, site__banned=False)

        results = results.order_by('-site__fresh', 'site__site__name','seq')
    else:
        categories = list((cat,0) for cat in categories)
        result_counts = dict(banned=0, other=0)
        results = []

    if format == 'csv':
        return get_csv(results, category)
    else:
        results, pages, _ = paginate(results, page, 15)
        return {
            'forms': forms,
            'results':  results,
            'pages':  pages,
            'categories': categories,
            'category': category,
            'result_counts': result_counts,
            'q': q,
            'search': search,
            }
Ejemplo n.º 18
0
def do_search(query, engine, user=None, count=20, categorize=True):
    if not isinstance(user, User) and user is not None:
        user = User.objects.get(username=user)
    if not isinstance(engine, Engine):
        engine = Engine.objects.get(symbol=engine)
    if not isinstance(query, Query):
        q = query
        query = None
    else:
        q = query.q

    if engine.symbol not in engines:
        return

    logger.debug('Invoking query "%s" using engine "%s".' % (q, engine.name))

    results = engines[engine.symbol](q=q)

    group = None
    if query:
        search = Search.objects.create(engine=engine, query=query, q=q)
        group = query.group
    else:
        search = Search.objects.create(engine=engine, q=q)
        if user:
            group = get_group(user)
    search.group = group
    search.manual = user is not None
    search.user = user
    search.save()

    logger.debug('Search id=%d.' % (search.id, ))

    for res in itake(count, results):

        seq = res.get('_seq') + 1
        bare_url = url_depath(res['url'])  # remove path from url
        site_data = site = None
        try:
            site = Site.objects.get(url=bare_url)
            site_data = SiteData.objects.get(site=site, group=group)
        except Exception:
            pass

        if site_data:
            logger.debug('%02d. Old result [id=%d] "%s".' % (
                seq,
                search.id,
                res['url'],
            ))
            # once processed in this run, we continue
            try:
                sr = SearchResult.objects.get(site=site_data, search=search)
                if sr:
                    sr.sequence += ', %s' % seq
                    sr.save()
                    continue
            except:
                pass

        fresh = False
        if not site_data:
            logger.debug('%02d. New result [id=%d] "%s".' % (
                seq,
                search.id,
                res['url'],
            ))

            if not site:
                site = Site(name=res.get('title', ) or '(no title found)',
                            url=bare_url)
                site.save()

            fresh = True
            site_data = SiteData(site=site,
                                 group=group,
                                 banned=False,
                                 fresh=fresh,
                                 found=search)
            site_data.save()

        search_result = SearchResult.objects.create(search=search,
                                                    sequence=seq,
                                                    seq=seq,
                                                    site=site_data)
        search_result.save()

    if categorize:
        ThreadList.thread(search.id, partial(do_categorize, search))

    return search
Ejemplo n.º 19
0
Archivo: util.py Proyecto: I-TREND/SASF
def do_search(query, engine, user=None, count=20, categorize=True):
    if not isinstance(user, User) and user is not None:
        user = User.objects.get(username=user)
    if not isinstance(engine,Engine):
        engine = Engine.objects.get(symbol=engine)
    if not isinstance(query,Query):
        q = query
        query = None
    else:
        q = query.q

    if engine.symbol not in engines:
        return

    logger.debug('Invoking query "%s" using engine "%s".'%(q, engine.name))

    results = engines[engine.symbol](q=q)

    group = None
    if query:
        search = Search.objects.create(engine=engine, query=query, q=q)
        group = query.group
    else:
        search = Search.objects.create(engine=engine, q=q)
        if user:
            group = get_group(user)
    search.group = group
    search.manual = user is not None
    search.user = user
    search.save()

    logger.debug('Search id=%d.'%(search.id,))

    for res in itake(count, results):

        seq = res.get('_seq')+1
        bare_url = url_depath(res['url']) # remove path from url
        site_data = site = None
        try:
            site = Site.objects.get(url=bare_url)
            site_data = SiteData.objects.get(site=site, group=group)
        except Exception:
            pass

        if site_data:
            logger.debug('%02d. Old result [id=%d] "%s".'%(seq,search.id,res['url'],))
            # once processed in this run, we continue
            try:
                sr = SearchResult.objects.get(site=site_data, search=search)
                if sr:
                    sr.sequence += ', %s'% seq
                    sr.save()
                    continue
            except:
                pass

        fresh = False
        if not site_data:
            logger.debug('%02d. New result [id=%d] "%s".'%(seq,search.id,res['url'],))

            if not site:
                site = Site(name=res.get('title',) or '(no title found)', url=bare_url)
                site.save()

            fresh = True
            site_data = SiteData(site=site, group=group, banned=False, fresh=fresh, found=search)
            site_data.save()

        search_result = SearchResult.objects.create(search=search, sequence=seq, seq=seq, site=site_data)
        search_result.save()

    if categorize:
        ThreadList.thread(search.id, partial(do_categorize,search))

    return search