def handle(self, *args, **options): if len(args) > 0: name = args[0] else: name = '' package = options['package'] all_collectors = collectors.find_collector(name, package=package) if len(all_collectors) > 1 and package == False: print 'Please choose only one collector from below to run.' print for collector in all_collectors: print collector.__class__.__name__ return if len(all_collectors) == 0: print 'no match collector' all_collectors = collectors.find_collector() print 'Please choose only one collector from below to run.' print for collector in all_collectors: print collector.__class__.__name__ return for collector in all_collectors: try: collector.fetch() except: traceback.print_exc(file=sys.stdout)
def handle(self, *args, **options): print 'LoveShopping collector run!' shopping_collectors = collectors.find_collector(package='shopping') for collector in shopping_collectors: name = collector.__class__.__name__ signals.brand_found.send( self, name = name, url = 'http://' + name, display_name = collector.display_name, priority = 1, logo = 'static/images/%s_logo.png' % name, ) for collector in shopping_collectors: try: collector.fetch() except: collector.logger.error(traceback.format_exc()) traceback.print_exc(file=sys.stdout) address_collectors = collectors.find_collector(package='address') for collector in address_collectors: try: collector.fetch() except: collector.logger.error(traceback.format_exc()) traceback.print_exc(file=sys.stdout) print 'Succeed!'
def handle(self, *args, **options): if len(args) > 0: name = args[0] else: name = '' all_collectors = collectors.find_collector(name) if len(all_collectors) > 1: print 'Please choose only one collector from below to run.' print for collector in all_collectors: print collector.__class__.__name__ return if len(all_collectors) == 0: print 'no match collector' all_collectors = collectors.find_collector() print 'Please choose only one collector from below to run.' print for collector in all_collectors: print collector.__class__.__name__ return if len(all_collectors) == 1: all_collectors[0].init()
def view_someday(request,collector_name,time_filter=1): collectors_matched=collectors.find_collector(collector_name) if len(collectors_matched)==0: http_response='<h1>Cannot find \"'+collector_name+'\"<br>' http_response+='Please select one from below:'+rn+'</h1>' http_response+='<p>' for collector in collectors.find_collector(): class_name=collector.__class__.__name__ link='<a href=\"http://localhost:8000/kernel/'+class_name+'/\">'+class_name+'</a>' http_response+=link+rn http_response+='</p>' return HttpResponse(http_response) elif len(collectors_matched)>1: http_response='<h1>'+str(len(collectors_matched))+' collectors have been found like \"'+collector_name+'\"<br>' http_response+='Please select one from below:'+rn+'</h1>' http_response+='<p>' for collector in collectors_matched: class_name=collector.__class__.__name__ link='<a href=\"http://localhost:8000/kernel/'+class_name+'/\">'+class_name+'</a>' http_response+=link+rn http_response+='</p>' return HttpResponse(http_response) else: TIME_FILTER=int(time_filter) time_label='today' if TIME_FILTER>1: time_label='in the last %d days' %TIME_FILTER collector=collectors_matched[0] objects = Object.objects.filter(branch=collector.__class__.__name__) time_limit=timezone.now()-datetime.timedelta(days=TIME_FILTER) objects_today=objects.filter(time__gt=time_limit) if len(objects_today)==0: search_result='<p>No update '+time_label+' !!!</p>' else: search_result='<table border=\"1\"><tr><th>Title</th><th>Time</th><th>URL</th></tr>' for object in objects_today: search_result+='<tr><td>'\ +object.title\ +'</td><td>'+object.time.strftime('%Y-%m-%d %H:%M:%S')\ +'</td><td>'+'<a href=\"'+object.url+'\">'+object.url+'</a>'\ +'</td></tr>' search_result+='</table>' http_response='<h1>\"'+collector.__class__.__name__+'\" has the following object(s) updated '+time_label+' :</h1>' http_response+=search_result return HttpResponse(http_response)
def json_response(request, collector): json_dic = {} try: all_collectors = collectors.find_collector(collector) if not len(all_collectors) == 1: raise APIError(1) key = request.GET.get('key', '') _check_api_key(key) begin_time = request.GET.get('begin_time', '') begin_time = _check_time(begin_time) end_time = request.GET.get('end_time', '') end_time = _check_time(end_time) data = all_collectors[0].data(request, begin_time, end_time) _check_data(data) success = APIError(0) json_dic['code'] = success.code json_dic['message'] = success.message json_dic['results'] = data except APIError, e: json_dic['code'] = e.code json_dic['message'] = e.message
def restart(request, collector_name): items = collectors.find_collector(package='shopping') address = collectors.find_collector(package='address') for item in items: if item.__class__.__name__ == collector_name: Restart(item).run() if request.META.has_key('HTTP_REFERER'): return HttpResponseRedirect(request.META['HTTP_REFERER']) for addr in address: if addr.__class__.__name__ == collector_name: Restart(addr).run() if request.META.has_key('HTTP_REFERER'): return HttpResponseRedirect(request.META['HTTP_REFERER'])
def handle(self, *args, **options): if len(args) == 0: print 'select a collector below:' print for collector in collectors.find_collector(): print collector.__class__.__name__ return collector_name = args[0] result = collectors.find_collector(collector_name) if len(result) == 0: print 'no collectors found:' print for collector in collectors.find_collector(): print collector.__class__.__name__ return if len(result) > 1: print 'please select only one collector' print for collector in result: print collector.__class__.__name__ return collector = result[0] print collector.__class__.__name__ objects = Object.objects.filter(branch=collector.__class__.__name__) print 'total objects: %d' % len(objects) yesterday = datetime.datetime.today() - datetime.timedelta(days=1) today_objects = objects.filter(time__gt=yesterday) print 'objects found later than %s: %d' % (yesterday.strftime('%Y-%m-%d'), len(today_objects)) today_objects = today_objects[:100] for obj in today_objects: obj.dump()
def brand_history(request, action): page = int(request.GET['page'], 10) collectors_matched = collectors.find_collector(package="shopping") brands_info = [] for collector in collectors_matched: brand_info = {} brand_info['brand_name'] = collector.__class__.__name__ objects = Object.objects.filter(branch=collector.__class__.__name__) if action == 'back': time_up = timezone.now() - datetime.timedelta(days=page+1) time_down = timezone.now() - datetime.timedelta(days=page+2) items = objects.filter(time__gt=time_down, time__lte=time_up) elif action == 'next': if page == 1: time_down= timezone.now() - datetime.timedelta(days=page) items = objects.filter(time__gt=time_down) else: time_up = timezone.now() - datetime.timedelta(days=page-1) time_down = timezone.now() - datetime.timedelta(days=page) items = objects.filter(time__gt=time_down, time__lte=time_up) items_image = [] for item in items: for image_urls in item.attribute_set.filter(name="image_url"): image_url = image_urls.value items_image.append(image_url) brand_info['count'] = len(items_image) brand_info['items_image'] = items_image brands_info.append(brand_info) if action == 'back': page = page + 1 elif action == 'next': page = page - 1 return render_to_response('brand_history.html', {'brands_info':brands_info, 'page':page})
def init_config(request): collectors_matched = collectors.find_collector(package="shopping") brand_collectors = [] for collector in collectors_matched: class_name = collector.__class__ if class_name.__name__.find('Address') > -1: continue if not collector.__class__.__name__.endswith('AddressCollector'): brand_collectors.append(collector) shop_info = [] all_stores = Attribute.objects.filter(name='storeaddr') for collector in brand_collectors: class_name = collector.__class__ display_name = class_name().display_name logo_url = "static/images/" + display_name.lower() +"_logo.png" attr_list = Attribute.objects.filter(name='brand', value=class_name.__name__) store_address = [] for attr in attr_list: store_object = attr.object store = all_stores.get(object = store_object) store_address.append(store.value) collector_value = { "collector": class_name.__name__, "display_name": display_name, "logo_url": logo_url, "store_address": store_address, } shop_info.append(collector_value); return HttpResponse(json.dumps(shop_info, ensure_ascii=False))
def json_api(request): content=request.GET content=content.copy() json_return=get_json_return_head(0) #check the API key key_GET=content.get('key') if (not key_GET) or key_GET!='timeline': json_return=get_json_return_head(3) return HttpResponse(json.dumps(json_return,ensure_ascii=False)) #check the collectors collectors_GET=content.getlist('c') if collectors_GET: collectors_required=[] for collector in collectors_GET: collectors_required=collectors_required+collectors.find_collector(name=collector,package='shopping') collectors_required=list(set(collectors_required)) if not collectors_required: json_return=get_json_return_head(2) return HttpResponse(json.dumps(json_return,ensure_ascii=False)) else: collectors_required=collectors.find_collector(package='shopping') #Check the time time_GET=content.get('prev_update') time_required=timezone.now() if time_GET: time_required=datetime.datetime.strptime(time_GET,'%Y%m%d%H%M%S') #To see if the required time is in future.Right now it is forbidden to use the time not yet arrived if time_required>timezone.now(): json_return=get_json_return_head(4) return HttpResponse(json.dumps(json_return,ensure_ascii=False)) json_return['previous_update_time']=time_required.strftime('%Y-%m-%d %H:%M:%S') #Check if init, if init is mentioned, return info in the last 14 days init=content.get('init') TIME_FILTER=14 if init: time_required=timezone.now()-datetime.timedelta(days=TIME_FILTER) elif time_required<timezone.now()-datetime.timedelta(days=TIME_FILTER): time_required=timezone.now()-datetime.timedelta(days=TIME_FILTER) #Get the information info={} for collector in collectors_required: info[collector.__class__.__name__]=[] objects=Object.objects.filter(branch=collector.__class__.__name__) objects=objects.filter(time__gt=time_required) #for test, only return 10 object for one collector for object in objects[:10]: object_value={} object_value['title']=object.title object_value['time']=object.time.strftime('%Y-%m-%d %H:%M:%S') object_value['url']=object.url attributes=Attribute.objects.filter(object=object) for attribute in attributes: object_value[attribute.name]=attribute.value info[collector.__class__.__name__].append(object_value) json_return['content']=info return HttpResponse(json.dumps(json_return,ensure_ascii=False))
def logo_info(request): try: page = int(request.GET['page'], 10) except : page = 1 dirnames = [] for parent, dirname, filenames in os.walk(logo_dir): if len(dirname) != 0: dirnames.append(dirname) all_times = dirnames[0] all_times.sort() try: time = all_times[-page] except: return HttpResponse(u'page超出范围') dir = os.path.join(logo_dir, time) items = collectors.find_collector(package='shopping') address = collectors.find_collector(package='address') realtime = datetime.datetime.strptime(time, '%Y_%m_%d') time_up = realtime + datetime.timedelta(days=1) item_logos = {} for item in items: display_name = item.__class__().display_name item_name = item.__class__.__name__ filename = item_name + '.txt' final_filename = os.path.join(dir, filename) if os.path.isfile(final_filename): error_count = 0 file_handler = open(final_filename, mode='r') list_lines = file_handler.readlines() if list_lines: for line in list_lines: if line.startswith('[ERROR]'): error_count = error_count + 1 else: error_count = 'empty_logo' file_handler.close() item_logos[item_name] = [] item_logos[item_name].append(error_count) new_count = models.Item.objects.filter( birth_time__gt=realtime, birth_time__lte=time_up, brand__display_name=display_name).count() item_logos[item_name].append(new_count) all_count = models.Item.objects.filter(brand__display_name=display_name).count() item_logos[item_name].append(all_count) brand_logos = {} for addr in address: display_name = addr.__class__().display_name addr_name = addr.__class__.__name__ filename = addr_name + '.txt' final_filename = os.path.join(dir, filename) if os.path.isfile(final_filename): error_count = 0 file_handler = open(final_filename, mode='r') list_lines = file_handler.readlines() if list_lines: for line in open(final_filename): if line.startswith('[ERROR]'): error_count = error_count + 1 else: error_count = 'empty_logo' file_handler.close() brand_logos[addr_name] = [] brand_logos[addr_name].append(error_count) new_count = models.Shop.objects.filter( birth_time__gt=realtime, birth_time__lte=time_up, brand__display_name= display_name).count() brand_logos[addr_name].append(new_count) return render_to_response('logo_info.html', { 'item_logos':item_logos, 'brand_logos':brand_logos, 'time':time, 'page':page })
def save(time, from_time=None): file_name = DATA_ROOT + '/init_%s' % time.strftime('%Y%m%d') brands = models.Brand.objects.filter(death_time__isnull=True) items = models.Item.objects.filter(death_time__isnull=True) shops = models.Shop.objects.filter(death_time__isnull=True) activities = models.Activity.objects.all() item_uptime = time item_downtime = item_uptime - one_day * 3 activities = activities.filter(endTime__gte=item_uptime) dirnames = [] for parent, dirname, filenames in os.walk(logo_dir): if len(dirname) != 0: dirnames.append(dirname) all_times = dirnames[0] all_times.sort() all_times.reverse() items_results = [] items_collector = collectors.find_collector(package='shopping') for item in items_collector: brand_name = item.__class__().display_name items_for_collector = items.filter( birth_time__gte=item_downtime, birth_time__lt=item_uptime, brand__display_name=brand_name) if len(items_for_collector) == 0: for time in all_times[3:]: up_time = datetime.datetime.strptime(time,'%Y_%m_%d') down_time = up_time - one_day items_for_collector = items.filter( birth_time__gte=down_time, birth_time__lt=up_time, brand__display_name=brand_name) if len(items_for_collector) != 0: break for item_for_collector in items_for_collector: items_results.append(item_for_collector) items = items_results if from_time: begin = from_time.date() end = begin + one_day brands = brands.filter(birth_time__gte=begin, birth_time__lt=end) shops = shops.filter(birth_time__gte=begin, birth_time__lt=end) file_name = DATA_ROOT + '/delta_%s' % from_time.strftime('%Y%m%d') brands = brands.order_by('priority') result = { 'brands': [brand.dict() for brand in brands], 'items': [item.dict() for item in items], 'shops': [shop.dict() for shop in shops], 'activities':[activity.dict() for activity in activities] } dthandler = lambda obj: obj.isoformat() if isinstance(obj, datetime.datetime) else None response = json.dumps(result, default=dthandler) if from_time: file = open(file_name, mode='w') else: file = gzip.open(file_name, mode='w') file.write(response) file.close()