Esempio n. 1
0
    def handle(self, *args, **options):
        if len(args) > 0:
            name = args[0]
        else:
            name = ''
        package = options['package']
        all_collectors = collectors.find_collector(name, package=package)


        if len(all_collectors) > 1 and package == False:
            print 'Please choose only one collector from below to run.'
            print

            for collector in all_collectors:
                print collector.__class__.__name__
            return

        if len(all_collectors) == 0:
            print 'no match collector'
            all_collectors = collectors.find_collector()
            print 'Please choose only one collector from below to run.'
            print

            for collector in all_collectors:
                print collector.__class__.__name__
            return

        for collector in all_collectors:
            try:
                collector.fetch()
            except:
                traceback.print_exc(file=sys.stdout)
Esempio n. 2
0
    def handle(self, *args, **options):
        print 'LoveShopping collector run!'

        shopping_collectors = collectors.find_collector(package='shopping')
        for collector in shopping_collectors:
            name = collector.__class__.__name__
            signals.brand_found.send(
                self,
                name = name,
                url = 'http://' + name,
                display_name = collector.display_name,
                priority = 1,
                logo = 'static/images/%s_logo.png' % name,
            )

        for collector in shopping_collectors:
            try:
                collector.fetch()
            except:
                collector.logger.error(traceback.format_exc())
                traceback.print_exc(file=sys.stdout)

        address_collectors = collectors.find_collector(package='address')
        for collector in address_collectors:
            try:
                collector.fetch()
            except:
                collector.logger.error(traceback.format_exc())
                traceback.print_exc(file=sys.stdout)

        print 'Succeed!'
Esempio n. 3
0
    def handle(self, *args, **options):
        if len(args) > 0:
            name = args[0]
        else:
            name = ''
        all_collectors = collectors.find_collector(name)
        if len(all_collectors) > 1:
            print 'Please choose only one collector from below to run.'
            print

            for collector in all_collectors:
                    print collector.__class__.__name__
            return

        if len(all_collectors) == 0:
            print 'no match collector'
            all_collectors = collectors.find_collector()
            print 'Please choose only one collector from below to run.'
            print

            for collector in all_collectors:
                print collector.__class__.__name__
            return

        if len(all_collectors) == 1:
            all_collectors[0].init()
Esempio n. 4
0
def view_someday(request,collector_name,time_filter=1):
    collectors_matched=collectors.find_collector(collector_name)
    if len(collectors_matched)==0:
        http_response='<h1>Cannot find \"'+collector_name+'\"<br>'
        http_response+='Please select one from below:'+rn+'</h1>'
        http_response+='<p>'
        for collector in collectors.find_collector():
            class_name=collector.__class__.__name__
            link='<a href=\"http://localhost:8000/kernel/'+class_name+'/\">'+class_name+'</a>'
            http_response+=link+rn
        http_response+='</p>'
        return HttpResponse(http_response)

    elif len(collectors_matched)>1:
        http_response='<h1>'+str(len(collectors_matched))+' collectors have been found like \"'+collector_name+'\"<br>'
        http_response+='Please select one from below:'+rn+'</h1>'
        http_response+='<p>'
        for collector in collectors_matched:
            class_name=collector.__class__.__name__
            link='<a href=\"http://localhost:8000/kernel/'+class_name+'/\">'+class_name+'</a>'
            http_response+=link+rn
        http_response+='</p>'
        return HttpResponse(http_response)

    else:
        TIME_FILTER=int(time_filter)
        time_label='today'
        if TIME_FILTER>1:
            time_label='in the last %d days' %TIME_FILTER
        collector=collectors_matched[0]
        objects = Object.objects.filter(branch=collector.__class__.__name__)
        time_limit=timezone.now()-datetime.timedelta(days=TIME_FILTER)
        objects_today=objects.filter(time__gt=time_limit)
        if len(objects_today)==0:
            search_result='<p>No update '+time_label+' !!!</p>'
        else:
            search_result='<table border=\"1\"><tr><th>Title</th><th>Time</th><th>URL</th></tr>'
            for object in objects_today:
                search_result+='<tr><td>'\
                               +object.title\
                               +'</td><td>'+object.time.strftime('%Y-%m-%d %H:%M:%S')\
                               +'</td><td>'+'<a href=\"'+object.url+'\">'+object.url+'</a>'\
                               +'</td></tr>'
            search_result+='</table>'
                
        http_response='<h1>\"'+collector.__class__.__name__+'\" has the following object(s) updated '+time_label+' :</h1>'
        http_response+=search_result
        return HttpResponse(http_response)
Esempio n. 5
0
def json_response(request, collector):
    json_dic = {}
    try:
        all_collectors = collectors.find_collector(collector)
        if not len(all_collectors) == 1:
            raise APIError(1)

        key = request.GET.get('key', '')
        _check_api_key(key)

        begin_time = request.GET.get('begin_time', '')
        begin_time = _check_time(begin_time)
        end_time = request.GET.get('end_time', '')
        end_time = _check_time(end_time)

        data = all_collectors[0].data(request, begin_time, end_time)
        _check_data(data)

        success = APIError(0)
        json_dic['code'] = success.code
        json_dic['message'] = success.message
        json_dic['results'] = data
    except APIError, e:
        json_dic['code'] = e.code
        json_dic['message'] = e.message
Esempio n. 6
0
def restart(request, collector_name):
    items = collectors.find_collector(package='shopping')
    address = collectors.find_collector(package='address')

    for item in items:
        if item.__class__.__name__ == collector_name:
            Restart(item).run()

            if request.META.has_key('HTTP_REFERER'):
                return HttpResponseRedirect(request.META['HTTP_REFERER'])

    for addr in address:
        if addr.__class__.__name__ == collector_name:
            Restart(addr).run()

            if request.META.has_key('HTTP_REFERER'):
                return HttpResponseRedirect(request.META['HTTP_REFERER'])
Esempio n. 7
0
    def handle(self, *args, **options):
        if len(args) == 0:
            print 'select a collector below:'
            print
            for collector in collectors.find_collector():
                print collector.__class__.__name__
            return

        collector_name = args[0]

        result = collectors.find_collector(collector_name)
        if len(result) == 0:
            print 'no collectors found:'
            print
            for collector in collectors.find_collector():
                print collector.__class__.__name__
            return

        if len(result) > 1:
            print 'please select only one collector'
            print
            for collector in result:
                print collector.__class__.__name__
            return

        collector = result[0]
        print collector.__class__.__name__

        objects = Object.objects.filter(branch=collector.__class__.__name__)
        print 'total objects: %d' % len(objects)

        yesterday = datetime.datetime.today() - datetime.timedelta(days=1)
        today_objects = objects.filter(time__gt=yesterday)
        print 'objects found later than %s: %d' % (yesterday.strftime('%Y-%m-%d'), len(today_objects))
        today_objects = today_objects[:100]
        for obj in today_objects:
            obj.dump()
Esempio n. 8
0
def brand_history(request, action):
    page = int(request.GET['page'], 10)

    collectors_matched = collectors.find_collector(package="shopping")
    brands_info = []

    for collector in collectors_matched:

        brand_info = {}
        brand_info['brand_name'] = collector.__class__.__name__

        objects = Object.objects.filter(branch=collector.__class__.__name__)
        if action == 'back':
            time_up = timezone.now() - datetime.timedelta(days=page+1)
            time_down = timezone.now() - datetime.timedelta(days=page+2)
            items = objects.filter(time__gt=time_down, time__lte=time_up)

        elif action == 'next':
            if page == 1:
                time_down= timezone.now() - datetime.timedelta(days=page)
                items = objects.filter(time__gt=time_down)
            else:
                time_up = timezone.now() - datetime.timedelta(days=page-1)
                time_down = timezone.now() - datetime.timedelta(days=page)
                items = objects.filter(time__gt=time_down, time__lte=time_up)

        items_image = []
        for item in items:
            for image_urls in item.attribute_set.filter(name="image_url"):
                image_url = image_urls.value
                items_image.append(image_url)

        brand_info['count'] = len(items_image)
        brand_info['items_image'] = items_image
        brands_info.append(brand_info)

    if action == 'back':
        page = page + 1
    elif action == 'next':
        page = page - 1

    return render_to_response('brand_history.html', {'brands_info':brands_info, 'page':page})
Esempio n. 9
0
def init_config(request):
    collectors_matched = collectors.find_collector(package="shopping")

    brand_collectors = []

    for collector in collectors_matched:
        class_name = collector.__class__
        if class_name.__name__.find('Address') > -1:
            continue
        if not collector.__class__.__name__.endswith('AddressCollector'):
            brand_collectors.append(collector)

    shop_info = []
    all_stores = Attribute.objects.filter(name='storeaddr')

    for collector in brand_collectors:
        class_name = collector.__class__
        display_name = class_name().display_name
        logo_url = "static/images/" + display_name.lower() +"_logo.png"

        attr_list = Attribute.objects.filter(name='brand', value=class_name.__name__)
        store_address = []

        for attr in attr_list:
            store_object = attr.object
            store = all_stores.get(object = store_object)
            store_address.append(store.value)

        collector_value = {
            "collector": class_name.__name__,
            "display_name": display_name,
            "logo_url": logo_url,
            "store_address": store_address,
        }

        shop_info.append(collector_value);

    return HttpResponse(json.dumps(shop_info, ensure_ascii=False))
Esempio n. 10
0
def json_api(request):
    content=request.GET
    content=content.copy()
    json_return=get_json_return_head(0)

    #check the API key
    key_GET=content.get('key')
    if (not key_GET) or key_GET!='timeline':
        json_return=get_json_return_head(3)
        return HttpResponse(json.dumps(json_return,ensure_ascii=False))

    #check the collectors
    collectors_GET=content.getlist('c')
    if collectors_GET:
        collectors_required=[]
        for collector in collectors_GET:
            collectors_required=collectors_required+collectors.find_collector(name=collector,package='shopping')
        collectors_required=list(set(collectors_required))
        if not collectors_required:
            json_return=get_json_return_head(2)
            return HttpResponse(json.dumps(json_return,ensure_ascii=False))
    else:
        collectors_required=collectors.find_collector(package='shopping')

    #Check the time
    time_GET=content.get('prev_update')
    time_required=timezone.now()
    if time_GET:
        time_required=datetime.datetime.strptime(time_GET,'%Y%m%d%H%M%S')

    #To see if the required time is in future.Right now it is forbidden to use the time not yet arrived
    if time_required>timezone.now():
        json_return=get_json_return_head(4)
        return HttpResponse(json.dumps(json_return,ensure_ascii=False))
    json_return['previous_update_time']=time_required.strftime('%Y-%m-%d %H:%M:%S')

    #Check if init, if init is mentioned, return info in the last 14 days
    init=content.get('init')
    TIME_FILTER=14
    if init:
        time_required=timezone.now()-datetime.timedelta(days=TIME_FILTER)
    elif time_required<timezone.now()-datetime.timedelta(days=TIME_FILTER):
        time_required=timezone.now()-datetime.timedelta(days=TIME_FILTER)

    #Get the information
    info={}
    for collector in collectors_required:
        info[collector.__class__.__name__]=[]
        objects=Object.objects.filter(branch=collector.__class__.__name__)

        objects=objects.filter(time__gt=time_required)

        #for test, only return 10 object for one collector
        for object in objects[:10]:
            object_value={}
            object_value['title']=object.title
            object_value['time']=object.time.strftime('%Y-%m-%d %H:%M:%S')
            object_value['url']=object.url
            attributes=Attribute.objects.filter(object=object)
            for attribute in attributes:
                object_value[attribute.name]=attribute.value
            info[collector.__class__.__name__].append(object_value)

    json_return['content']=info

    return HttpResponse(json.dumps(json_return,ensure_ascii=False))
Esempio n. 11
0
def logo_info(request):
    try:
        page = int(request.GET['page'], 10)
    except :
        page = 1

    dirnames = []
    for parent, dirname, filenames in os.walk(logo_dir):
        if len(dirname) != 0:
            dirnames.append(dirname)
    all_times = dirnames[0]
    all_times.sort()
    try:
        time = all_times[-page]
    except:
        return HttpResponse(u'page超出范围')

    dir = os.path.join(logo_dir, time)
    items = collectors.find_collector(package='shopping')
    address = collectors.find_collector(package='address')

    realtime = datetime.datetime.strptime(time, '%Y_%m_%d')
    time_up = realtime + datetime.timedelta(days=1)

    item_logos = {}
    for item in items:
        display_name = item.__class__().display_name
        item_name = item.__class__.__name__
        filename = item_name + '.txt'
        final_filename = os.path.join(dir, filename)

        if os.path.isfile(final_filename):
            error_count = 0
            file_handler = open(final_filename, mode='r')
            list_lines = file_handler.readlines()

            if list_lines:
              for line in list_lines:
                if line.startswith('[ERROR]'):
                    error_count = error_count + 1
            else:
                error_count = 'empty_logo'
            file_handler.close()

            item_logos[item_name] = []
            item_logos[item_name].append(error_count)

            new_count = models.Item.objects.filter(
                birth_time__gt=realtime,
                birth_time__lte=time_up,
                brand__display_name=display_name).count()
            item_logos[item_name].append(new_count)

            all_count = models.Item.objects.filter(brand__display_name=display_name).count()
            item_logos[item_name].append(all_count)


    brand_logos = {}
    for addr in address:
        display_name = addr.__class__().display_name
        addr_name = addr.__class__.__name__
        filename = addr_name + '.txt'
        final_filename = os.path.join(dir, filename)

        if os.path.isfile(final_filename):
            error_count = 0
            file_handler = open(final_filename, mode='r')
            list_lines = file_handler.readlines()

            if list_lines:
                for line in open(final_filename):
                    if line.startswith('[ERROR]'):
                        error_count = error_count + 1
            else:
                error_count = 'empty_logo'
            file_handler.close()

            brand_logos[addr_name] = []
            brand_logos[addr_name].append(error_count)

            new_count = models.Shop.objects.filter(
                birth_time__gt=realtime,
                birth_time__lte=time_up,
                brand__display_name= display_name).count()
            brand_logos[addr_name].append(new_count)

    return render_to_response('logo_info.html', {
                'item_logos':item_logos, 'brand_logos':brand_logos,
                'time':time, 'page':page
                })
Esempio n. 12
0
def save(time, from_time=None):
    file_name = DATA_ROOT + '/init_%s' % time.strftime('%Y%m%d')

    brands = models.Brand.objects.filter(death_time__isnull=True)
    items = models.Item.objects.filter(death_time__isnull=True)
    shops = models.Shop.objects.filter(death_time__isnull=True)
    activities = models.Activity.objects.all()

    item_uptime = time
    item_downtime = item_uptime - one_day * 3
    activities = activities.filter(endTime__gte=item_uptime)

    dirnames = []
    for parent, dirname, filenames in os.walk(logo_dir):
        if len(dirname) != 0:
            dirnames.append(dirname)
    all_times = dirnames[0]
    all_times.sort()
    all_times.reverse()

    items_results = []
    items_collector = collectors.find_collector(package='shopping')
    for item in items_collector:
        brand_name = item.__class__().display_name
        items_for_collector = items.filter(
            birth_time__gte=item_downtime,
            birth_time__lt=item_uptime,
            brand__display_name=brand_name)

        if len(items_for_collector) == 0:
            for time in all_times[3:]:
                up_time = datetime.datetime.strptime(time,'%Y_%m_%d')
                down_time = up_time - one_day
                items_for_collector = items.filter(
                    birth_time__gte=down_time,
                    birth_time__lt=up_time,
                    brand__display_name=brand_name)
                if len(items_for_collector) != 0:
                    break

        for item_for_collector in items_for_collector:
            items_results.append(item_for_collector)

    items = items_results

    if from_time:
        begin = from_time.date()
        end = begin + one_day
        brands = brands.filter(birth_time__gte=begin, birth_time__lt=end)
        shops = shops.filter(birth_time__gte=begin, birth_time__lt=end)
        file_name = DATA_ROOT + '/delta_%s' % from_time.strftime('%Y%m%d')

    brands = brands.order_by('priority')
    result = {
        'brands': [brand.dict() for brand in brands],
        'items': [item.dict() for item in items],
        'shops': [shop.dict() for shop in shops],
        'activities':[activity.dict() for activity in activities]
    }

    dthandler = lambda obj: obj.isoformat() if isinstance(obj, datetime.datetime) else None
    response = json.dumps(result, default=dthandler)

    if from_time:
        file = open(file_name, mode='w')
    else:
        file = gzip.open(file_name, mode='w')
    file.write(response)
    file.close()