Ejemplo n.º 1
0
def rebuild_love_count():
    utc_dt = datetime.datetime.utcnow() - datetime.timedelta(
        days=7)  # rebuild last week and this week
    week_start, _ = utc_week_limits(utc_dt)

    set_toggle_state(LOVE_SENDING_ENABLED, False)

    logging.info('Deleting LoveCount table... {}MB'.format(
        memory_usage().current()))
    ndb.delete_multi(
        LoveCount.query(LoveCount.week_start >= week_start).fetch(
            keys_only=True))
    employee_dict = {employee.key: employee for employee in Employee.query()}
    logging.info('Rebuilding LoveCount table... {}MB'.format(
        memory_usage().current()))
    cursor = None
    count = 0
    while True:
        loves, cursor, has_more = Love.query(
            Love.timestamp >= week_start).fetch_page(500, start_cursor=cursor)
        for l in loves:
            LoveCount.update(l, employee_dict=employee_dict)
        count += len(loves)
        logging.info('Processed {} loves, {}MB'.format(
            count,
            memory_usage().current()))
        if not has_more:
            break
    logging.info('Done. {}MB'.format(memory_usage().current()))

    set_toggle_state(LOVE_SENDING_ENABLED, True)
Ejemplo n.º 2
0
def _index_employees(employees):
    logging.info('Indexing employees... {}MB'.format(memory_usage().current()))
    index = search.Index(name=INDEX_NAME)
    # According to appengine, put can handle a maximum of 200 documents,
    # and apparently batching is more efficient
    for chunk_of_200 in chunk(employees, 200):
        documents = []
        for employee in chunk_of_200:
            if employee is not None:
                # Gross hack to support prefix matching, see documentation for _generate_substrings
                substrings = u' '.join([
                    _generate_substrings(employee.first_name),
                    _generate_substrings(employee.last_name),
                    _generate_substrings(employee.username),
                ])
                doc = search.Document(fields=[
                    # Full name is already unicode
                    search.TextField(name='full_name',
                                     value=employee.full_name),
                    search.TextField(name='username',
                                     value=unicode(employee.username)),
                    search.TextField(name='substrings', value=substrings),
                ])
                documents.append(doc)
        index.put(documents)
    logging.info('Done indexing employees. {}MB'.format(
        memory_usage().current()))
Ejemplo n.º 3
0
 def get(self):
     repo = self.request.get('repo')
     number = self.request.get('number')
     if self.request.get('format') == 'json':
         ancestor = models.GithubResource.make_key(repo, number)
         events = list(models.GithubWebhookRaw.query(ancestor=ancestor))
         self.response.headers['content-type'] = 'application/json'
         self.response.write(json.dumps([e.body for e in events], indent=True))
         return
     self.response.write(
         '<style>td pre{max-height:200px;max-width:800px;overflow:scroll}</style>')
     self.response.write('<p>Memory: %s' % memory_usage().current())
     self.emit_classified(repo, number)
     self.response.write('<p>Memory: %s' % memory_usage().current())
     if self.request.get('classify_only'):
         return
     merged = self.emit_events(repo, number)
     self.response.write('<p>Memory: %s' % memory_usage().current())
     if 'head' in merged:
         sha = merged['head']['sha']
         results = models.GHStatus.query_for_sha(repo, sha)
         self.response.write('</table><table>')
         for res in results:
             self.response.write('<tr><td>%s<td>%s<td><a href="%s">%s</a>\n'
                % (res.context, res.state, res.target_url, res.description))
     models.shrink(merged)
     self.response.write('</table><pre>%s</pre>' % cgi.escape(
         json.dumps(merged, indent=2, sort_keys=True)))
     self.response.write('<p>Memory: %s' % memory_usage().current())
Ejemplo n.º 4
0
def _clear_index():
    logging.info('Clearing index... {}MB'.format(memory_usage().current()))
    index = search.Index(name=INDEX_NAME)
    last_id = None
    while True:
        # We can batch up to 200 doc_ids in the delete call, and
        # batching is better according to the docs. Because we're deleting
        # async, we need to keep track of where we left off each time
        # we do get_range
        use_start_object = False
        if last_id is None:
            use_start_object = True
        doc_ids = [
            doc.doc_id for doc in index.get_range(
                ids_only=True,
                limit=200,
                start_id=last_id,
                include_start_object=use_start_object,
            )
        ]
        if not doc_ids:
            break
        last_id = doc_ids[-1]
        index.delete(doc_ids)

    logging.info('Done clearing index. {}MB'.format(memory_usage().current()))
Ejemplo n.º 5
0
    def post(self):
        active_subs = Subscription.get_active_subscriptions()

        items = json.loads(self.request.get('items'))
        logging.debug('before parsing, memory: %s' %
                      runtime.memory_usage().current())
        parser = RentParser()
        parsed_items = []

        for item in items:
            try:
                parsed = parser.parse(item)
                ret = RentRecord.add_record(parsed)
            except Exception as e:
                logging.error(repr(e))

            parsed_items.append(parsed)

        logging.debug('after parsing, memory: %s' %
                      runtime.memory_usage().current())

        user2message = filter_items(parsed_items, active_subs)

        for user, item in user2message.items():
            logging.debug('user: %s has %d messages' % (user, len(item)))
            User.update_user_items(user, item)
            url = get_short_url(user)
            if not url:
                url = site_config.url + '?user=%s' % user
            msg = [u'新找到%d条租房信息。' % len(item), u'点击以下链接查看:', url]

            messenger.send_message(user, '\n'.join(msg))
Ejemplo n.º 6
0
    def post(self):
        active_subs = Subscription.get_active_subscriptions()

        items = json.loads(self.request.get('items'))
        logging.debug('before parsing, memory: %s' % runtime.memory_usage().current())
        parser = RentParser()
        parsed_items = []

        for item in items:
            try:
                parsed  = parser.parse(item)
                ret     = RentRecord.add_record(parsed)
            except Exception as e:
                logging.error(repr(e))

            parsed_items.append(parsed)

        logging.debug('after parsing, memory: %s' % runtime.memory_usage().current())

        user2message = filter_items(parsed_items, active_subs)

        for user, item in user2message.items():
            logging.debug('user: %s has %d messages' % (user, len(item)))
            User.update_user_items(user, item)
            url = get_short_url(user)
            if not url:
                url = site_config.url + '?user=%s' % user
            msg = [u'新找到%d条租房信息。' % len(item),
                   u'点击以下链接查看:',
                   url]

            messenger.send_message(user, '\n'.join(msg))
Ejemplo n.º 7
0
 def wrapper(*args, **kwargs):
   logging.info('Memory before method %s is %s.',
                method.__name__, runtime.memory_usage().current())
   result = method(*args, **kwargs)
   logging.info('Memory after method %s is %s',
                method.__name__, runtime.memory_usage().current())
   return result
Ejemplo n.º 8
0
 def get(self):
     repo = self.request.get('repo')
     number = self.request.get('number')
     if self.request.get('format') == 'json':
         ancestor = models.GithubResource.make_key(repo, number)
         events = list(models.GithubWebhookRaw.query(ancestor=ancestor))
         self.response.headers['content-type'] = 'application/json'
         self.response.write(
             json.dumps([e.body for e in events], indent=True))
         return
     self.response.write(
         '<style>td pre{max-height:200px;max-width:800px;overflow:scroll}</style>'
     )
     self.response.write('<p>Memory: %s' % memory_usage().current())
     self.emit_classified(repo, number)
     self.response.write('<p>Memory: %s' % memory_usage().current())
     if self.request.get('classify_only'):
         return
     merged = self.emit_events(repo, number)
     self.response.write('<p>Memory: %s' % memory_usage().current())
     if 'head' in merged:
         sha = merged['head']['sha']
         results = models.GHStatus.query_for_sha(repo, sha)
         self.response.write('</table><table>')
         for res in results:
             self.response.write(
                 '<tr><td>%s<td>%s<td><a href="%s">%s</a>\n' %
                 (res.context, res.state, res.target_url, res.description))
     models.shrink(merged)
     self.response.write(
         '</table><pre>%s</pre>' %
         cgi.escape(json.dumps(merged, indent=2, sort_keys=True)))
     self.response.write('<p>Memory: %s' % memory_usage().current())
Ejemplo n.º 9
0
 def wrapper(*args, **kwargs):
     logging.info('Memory before method %s is %s.', method.__name__,
                  runtime.memory_usage().current())
     result = method(*args, **kwargs)
     logging.info('Memory after method %s is %s', method.__name__,
                  runtime.memory_usage().current())
     return result
Ejemplo n.º 10
0
def _update_employees(employee_dicts):
    """Given a JSON string in the format "[{employee info 1}, {employee info 2}, ...]",
    create new employee records and update existing records as necessary.

    Then determine whether any employees have been terminated since the last update,
    and mark these employees as such.
    """
    logging.info('Updating employees... {}MB'.format(memory_usage().current()))

    db_employee_dict = {
        employee.username: employee
        for employee in Employee.query()
    }

    all_employees, new_employees = [], []
    current_usernames = set()
    for d in employee_dicts:
        existing_employee = db_employee_dict.get(d['username'])
        if existing_employee is None:
            new_employee = Employee.create_from_dict(d, persist=False)
            all_employees.append(new_employee)
            new_employees.append(new_employee)
        else:
            existing_employee.update_from_dict(d)
            # If the user is in the S3 dump, then the user is no longer
            # terminated.
            existing_employee.terminated = False
            all_employees.append(existing_employee)

        current_usernames.add(d['username'])
        if len(all_employees) % 200 == 0:
            logging.info('Processed {} employees, {}MB'.format(
                len(all_employees),
                memory_usage().current()))
    ndb.put_multi(all_employees)

    # Figure out if there are any employees in the DB that aren't in the S3
    # dump. These are terminated employees, and we need to mark them as such.
    db_usernames = set(db_employee_dict.keys())

    terminated_usernames = db_usernames - current_usernames
    terminated_employees = []
    for username in terminated_usernames:
        employee = db_employee_dict[username]
        employee.terminated = True
        terminated_employees.append(employee)
    ndb.put_multi(terminated_employees)

    logging.info('Done updating employees. {}MB'.format(
        memory_usage().current()))
def fashionista_register_shutdown_hook():
    apiproxy_stub_map.apiproxy.CancelApiCalls()
  
    # We can save state to datastore here or Log some statistics.
    logThis(AEL_LEVEL_WARNING, 'SHUTDOWN IN PROGRESS...')
    logThis(AEL_LEVEL_INFO,'CPU USAGE: %s' % runtime.cpu_usage())
    logThis(AEL_LEVEL_INFO,'MEMORY USAGE: %s' % runtime.memory_usage())
Ejemplo n.º 12
0
def fashionista_register_shutdown_hook():
    apiproxy_stub_map.apiproxy.CancelApiCalls()

    # We can save state to datastore here or Log some statistics.
    logThis(AEL_LEVEL_WARNING, 'SHUTDOWN IN PROGRESS...')
    logThis(AEL_LEVEL_INFO, 'CPU USAGE: %s' % runtime.cpu_usage())
    logThis(AEL_LEVEL_INFO, 'MEMORY USAGE: %s' % runtime.memory_usage())
Ejemplo n.º 13
0
    def post(self):
        tx = self.request.get_range('x', None)
        ty = self.request.get_range('y', None)
        z = self.request.get_range('z', None)
        limit = self.request.get_range('limit', min_value=1, max_value=1000, default=1000)
        offset = self.request.get_range('offset', min_value=0, default=0)
        name = self.request.get('name', None)
        source_name = self.request.get('source', None)

        if tx is None or ty is None or z is None or name is None or source_name is None:
            self.error(400)
            return
        
        key = 'tile-%s-%s-%s-%s-%s' % (z, ty, tx, source_name, name)
        png = cache.get(key)
        if png is None:
            png = get_tile_png(tx, ty, z, name, source_name, limit, offset) 
            if png is None:
                png = PointTile.blank()
            cache.add(key, png, dumps=False)
    
        logging.info('TILE BACKEND MEMORY = %s' % runtime.memory_usage().current())
        self.response.set_status(200)
        self.response.headers['Content-Type'] = 'image/png'
        self.response.out.write(png)            
Ejemplo n.º 14
0
    def end(self, spacename, key, ok=True, msg=''):

        # <editor-fold desc="Verify if a previous init exist. If not return False and self.data['init'][spacename][key]['error']">
        if spacename not in self.data['init'].keys():
            self.data['init'][spacename] = {
                key: {
                    "error":
                    "CorePerformance.end with no previous CorePerformance.init"
                }
            }
            return False

        if key not in self.data['init'][spacename]:
            self.data['init'][spacename][key] = {
                "error":
                "CorePerformance.end with no previous CorePerformance.init"
            }
            return False
        # </editor-fold>

        # <editor-fold desc="Verify if a previous init exist. If not return False and self.data['init'][spacename][key]['error']">
        self.data['init'][spacename][key] = {
            "mem":
            self.data['init'][spacename][key]['mem'] -
            memory_usage().current(),
            "time":
            time.time() - self.data['init'][spacename][key]['time'],
            "ok":
            ok
        }

        if not ok:
            self.data['init'][spacename][key]['notes'] = msg
Ejemplo n.º 15
0
def _get_employee_info_from_s3():
    from boto import connect_s3
    from boto.s3.key import Key

    logging.info('Reading employees file from S3... {}MB'.format(
        memory_usage().current()))
    key = Key(
        connect_s3(
            aws_access_key_id=get_secret('AWS_ACCESS_KEY_ID'),
            aws_secret_access_key=get_secret('AWS_SECRET_ACCESS_KEY'),
        ).get_bucket(config.S3_BUCKET),
        'employees.json',
    )
    employee_dicts = json.loads(key.get_contents_as_string())
    logging.info('Done reading employees file from S3. {}MB'.format(
        memory_usage().current()))
    return employee_dicts
Ejemplo n.º 16
0
def log(s):
    global tmp_count
    tmp_count = tmp_count + 1
    memory_usage = runtime.memory_usage().current()
    q = Log()
    q.count = tmp_count
    q.txt = str(memory_usage)
    q.log = s
    q.put()
Ejemplo n.º 17
0
def log(s):
    global tmp_count
    tmp_count=tmp_count+1
    memory_usage = runtime.memory_usage().current()
    q=Log()
    q.count=tmp_count
    q.txt=str(memory_usage)
    q.log=s
    q.put()
Ejemplo n.º 18
0
    def add(self, title, file='', type='all'):

        # Hidding full path (security)
        file = file.replace(os.path.dirname(os.path.dirname(__file__)), "")

        # Preparing the line to save
        line = ''
        if type == 'note':
            line += "[" + type
        else:
            line += str(self.data['lastIndex']) + " ["

        if file.__len__():
            file = " (" + file + ")"

        # Calculating memory
        _mem = memory_usage().current() - self.data['lastMemory']
        if type == 'all' or type == 'endnote' or type == 'memory':
            line += str(round(_mem, 3)) + ' Mb'
            self.data['lastMemory'] = memory_usage().current()

        # Calculating memory
        _time = time.time() - self.data['lastMicrotime']
        if type == 'all' or type == 'endnote' or type == 'time':
            line += ', ' + str(round(_time, 3)) + ' secs'
            self.data['lastMicrotime'] = time.time()

        # Adding the title
        line += '] ' + str(title)

        # Adding accum data

        if type != 'note':
            line = "[ " + str(round(
                memory_usage().current(), 3)) + " Mb, " + str(
                    round(time.time() - self.data['initMicrotime'],
                          3)) + " secs] / " + line + file

        if type == 'endnote':
            line = "[" + type + "] " + line

        self.data['info'].append(line)
        self.data['lastIndex'] += 1
Ejemplo n.º 19
0
    def init(self, spacename, key):

        # <editor-fold desc="Init self.data['init'][spacename][key]['mem'],['time'],['ok']">
        if spacename not in self.data['init'].keys():
            self.data['init'][spacename] = {}
        self.data['init'][spacename][key] = {
            "mem": memory_usage().current(),
            "time": time.time(),
            "ok": True
        }
Ejemplo n.º 20
0
def gas_update_xls(option="1"):
	xls_result = ResultIter()
	logging.info("comienzo gas_update_xls: %s" %memory_usage().current())
	if option == "0":
		option = sorted(FUEL_OPTIONS.keys())[1:]
		logging.info("Buscando datos de todos los tipos")
	else:
		option = [option]
	for o in option:
		logging.info("Obteniendo %s" %FUEL_OPTIONS[o]["name"])
		response = urlfetch.fetch(URL_XLS+o, deadline=55)
		if response.status_code == 200:
			page = html.document_fromstring(response.content)
			tables = page.xpath("body/table")
			if tables:	# si encuentra tablas en el resultado
				rows = tables[0].findall("tr")
				if len(rows)<5:
					return None
				for tr in rows[3:]:
					row_data = [td.text for td in tr.getchildren()]
					if row_data[7] == "P":	# guardo sólo gaslineras de venta público
						date = map(int, row_data[4].split("/"))
						date.reverse();
						xls_result.add_item(
							province = row_data[0],
							town     = row_data[1],
							station  = row_data[2] + " [" + re.sub("\s+", "", row_data[3]) + "]",
							date     = date,
							label    = row_data[6],
							hours    = row_data[9],
							option   = {o: float(re.sub(",", ".", row_data[5]))})
				logging.info("fin procesando %s: %s" %(o, memory_usage().current()))
			else:
				logging.info("sin informacion en %s" %o)
				return None
		else:
			logging.info("Error en el geoportal")
			return None
	return xls_result
Ejemplo n.º 21
0
	def get(self):
		self.setVisInfo(self.request.remote_addr,self.user.nickname(),self.user.email())
		now=time.ctime(time.time())
		self._print('{} - Hello {} ({}) !'.format(now,self.user.nickname(),self.user.email()))
		self._hr()
		self._print("Your IP: "+self.request.remote_addr)
		self._print("Origin URL: "+self.request.url)
		self._print("uid: "+str(os.getuid()))
		self._print("pid: "+str(os.getpid()))
		self._print("cwd: "+os.getcwd())
		self._print(pl.system()+", "+pl.architecture()[0])
		self._print(pl.python_implementation()+", "+pl.python_version())
		self._print("Used mem: "+str(runtime.memory_usage().current())+" MB")
		self._print("Used mem last min: "+str(runtime.memory_usage().average1m())+" MB")
		self._print("CPU usage: "+str(runtime.cpu_usage().total())+" Mcycles")
		self._print("CPU usage last min: "+str(runtime.cpu_usage().rate1m())+" Mcycles")
		self._hr()
		self._form("/primez","de_la","la")
		self._hr()
		self.response.out.write("<a href='"+backends.get_url('primer')+"/backend/primer/mumu'>Primer</a>")
		self._hr()
		self.response.out.write("<a href='/logoff'>LogOut_1</a>")
		self._br()	
		self.response.out.write("<a href='"+users.create_logout_url("/")+"'>LogOut_2</a>")
Ejemplo n.º 22
0
 def __init__(self):
     self.data = OrderedDict()
     self.data['initMicrotime'] = time.time()
     self.data['lastMicrotime'] = self.data['initMicrotime']
     self.data['initMemory'] = memory_usage().current()
     self.data['lastMemory'] = self.data['initMemory']
     self.data['lastIndex'] = 1
     self.data['info'] = []
     self.data['info'].append(
         "File :" +
         __file__.replace(os.path.dirname(os.path.dirname(__file__)), ""))
     self.data['info'].append("Init Memory Usage: " +
                              str(self.data['initMemory']))
     self.data['init'] = OrderedDict()
     self.root_path = os.path.dirname(os.path.dirname(__file__))
Ejemplo n.º 23
0
    def post(self):
        # Parameters checked by frontend
        name = self.request.get('name')
        source_name = self.request.get('source')

        # Get source
        source = sources.get(source_name)
        if not source:
            logging.error('Cannot harvest without a source')
            self.error(404)
            # Update job status to 'error'
            job = get_job(name, source_name, 'error', msg='Unsupported source')
            cache.add(key, job)
            return

        # Check cache for harvest job 
        key = get_job_cache_key(name, source_name)
        job = cache.get(key)
        if not job:
                self.error(404)
                self.response.headers['Content-Type'] = "application/json"
                self.response.out.write('{"error":"unknown job %s"}' % key)
                return
            
        count = 0

        # Update job status to 'working'
        cache.add(key, get_job(name, source_name, 'working', msg=count))       
        
        # Get points from source and put them into datastore in batches
        pcount = 0
        for points in self.get_points(name, source):
            logging.info('HARVEST BACKEND MEMORY = %s after %s points' % (runtime.memory_usage().current(), count))
            entities = []
            for p in points:
                pkey = Key('Point', '%s-%s-%s' % (source_name, name, pcount))
                pcount += 1
                entities.append(Point(key=pkey, lat=p[0], lng=p[1]))
                entities.append(PointIndex.create(pkey, p[0], p[1], name, source_name))
            model.put_multi(entities)
            count += len(points)
            cache.add(key, get_job(name, source_name, 'working', msg=count))

        # Update job status to 'done'
        # TODO: Done now or after backend rendering completes?
        cache.add(key, get_job(name, source_name, 'done', msg=count))
Ejemplo n.º 24
0
def runtime_stats():
    mem = {}

    mem_use = runtime.memory_usage()

    mem['Memory (current)'] = mem_use.current()
    mem['Memory (average 1m)'] = mem_use.average1m()
    mem['Memory (average 10m)'] = mem_use.average10m()

    cpu = {}

    cpu_use = runtime.cpu_usage()
    cpu['CPU (Total)'] = cpu_use.total()
    cpu['CPU (Average 1m)'] = cpu_use.rate1m()
    cpu['CPU (Average 10m)'] = cpu_use.rate10m()

    return render_template('runtime.html', cpu=cpu, mem=mem)
Ejemplo n.º 25
0
def debug(location, message, params=None, force=False):
    if not (settings.REMOTE_DEBUG or settings.LOCALE_DEBUG or force):
        return

    if params is None:
        params = {}

    params["memory"] = runtime.memory_usage().current()
    params["instance_id"] = settings.INSTANCE_ID

    debug_message = "%s/%s?%s" % (urllib2.quote(location), urllib2.quote(message), "&".join(["%s=%s" % (p, urllib2.quote(unicode(params[p]).encode("utf-8"))) for p in params]))

    try:
        if settings.REMOTE_DEBUG or force:
            fetch("%s/%s" % (settings.REMOTE_DEBUGGER, debug_message))
    except:
        pass
        
    if settings.LOCALE_DEBUG or force:
        logging.debug(debug_message)
Ejemplo n.º 26
0
def deferred(request):

    if 'HTTP_X_APPENGINE_TASKNAME' not in request.META:
      logging.critical('Detected an attempted XSRF attack. The header '
                       '"X-AppEngine-Taskname" was not set.')
      return http.HttpResponse(status=403)



    in_prod = (
        not request.environ.get("SERVER_SOFTWARE").startswith("Devel"))
    if in_prod and request.environ.get("REMOTE_ADDR") != "0.1.0.2":
      logging.critical('Detected an attempted XSRF attack. This request did '
                       'not originate from Task Queue.')
      return http.HttpResponse(status=403)


    headers = ["%s:%s" % (k, v) for k, v in request.META.items()
               if k.lower().startswith("x-appengine-")]
    logging.info(", ".join(headers))


    try:
      #logging.info('request3')
      
      raw_post_data = request.read()
      
      logging.info("memory usage: %s",runtime.memory_usage().current())
      
      run(raw_post_data)

      return http.HttpResponse(status=200)
    except SingularTaskFailure:


      logging.debug("Failure executing task, task retry forced")
      return http.HttpResponse(status=408)
      return
    except PermanentTaskFailure, e:

      logging.exception("Permanent failure attempting to execute task")
Ejemplo n.º 27
0
    def _ensure_within_memory_limit(self):
        memory_limit = _MEMORY_LIMIT * 0.8

        memory_usage = runtime.memory_usage().current()
        if memory_usage >= memory_limit:
            # Create a list of candidate counters to remove. We remove counters
            # that have not been modified before those that have been modified,
            # then order them by the last time they were accessed.
            counters = self._store.values()
            counters.sort(
                key=lambda counter: (counter._dirty, counter._last_accessed))
            counters_to_cull = int(math.ceil(len(counters) * _CULL_AMOUNT))
            counters = counters[:counters_to_cull]

            logging.info(
                'Removing %d entries as we are over the memory limit '
                'by %dMB.', counters_to_cull, memory_limit - memory_usage)

            self._write_in_batches(counters)
            for counter in counters:
                del self._store[counter.key().name()]
    def _ensure_within_memory_limit(self):
        memory_limit = _MEMORY_LIMIT * 0.8

        memory_usage = runtime.memory_usage().current()
        if memory_usage >= memory_limit:
            # Create a list of candidate counters to remove. We remove counters
            # that have not been modified before those that have been modified,
            # then order them by the last time they were accessed.
            counters = self._store.values()
            counters.sort(key=lambda counter: (counter._dirty,
                                               counter._last_accessed))
            counters_to_cull = int(math.ceil(len(counters) * _CULL_AMOUNT))
            counters = counters[:counters_to_cull]

            logging.info('Removing %d entries as we are over the memory limit '
                         'by %dMB.',
                         counters_to_cull, memory_limit - memory_usage)

            self._write_in_batches(counters)
            for counter in counters:
                del self._store[counter.key().name()]
def get_memory():
    if util.dev_server:
        try:
            # This will work in a dev shell, but will raise an error on
            # a dev server.  We convert to MB for consistency with prod.
            #
            # TODO(benkraft): Hack the dev server to allow the import.
            # It prohibits any import that wouldn't be allowed on prod,
            # but here we would actually like to be able to do the
            # import anyway, since we already do things differently on
            # prod.
            #
            # TODO(benkraft): Craig thinks the live runtime API is
            # actually reporting VSS, not RSS, so maybe we should use
            # that for consistency.  Better yet, use both.
            import resource
            return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.
        except:
            return 0
    else:
        # This will work anywhere, but will return 0 on dev.  It involves an RPC.
        return runtime.memory_usage().current()
Ejemplo n.º 30
0
def get_memory():
    if util.dev_server:
        try:
            # This will work in a dev shell, but will raise an error on
            # a dev server.  We convert to MB for consistency with prod.
            #
            # TODO(benkraft): Hack the dev server to allow the import.
            # It prohibits any import that wouldn't be allowed on prod,
            # but here we would actually like to be able to do the
            # import anyway, since we already do things differently on
            # prod.
            #
            # TODO(benkraft): Craig thinks the live runtime API is
            # actually reporting VSS, not RSS, so maybe we should use
            # that for consistency.  Better yet, use both.
            import resource
            return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.
        except:
            return 0
    else:
        # This will work anywhere, but will return 0 on dev.  It involves an RPC.
        return runtime.memory_usage().current()
Ejemplo n.º 31
0
	def post(self):
		de_la=cgi.escape(self.request.get('de_la'))
		la=cgi.escape(self.request.get('la'))
		rezults=self.getPrimes(de_la,la)
		dur=datetime.datetime.fromtimestamp(int(self.f_time))-datetime.datetime.fromtimestamp(int(self.s_time))
		stats="Last min.: {} Mcycles {} MB Current: {} Mcycles {} MB".format(runtime.cpu_usage().rate1m(),runtime.memory_usage().average1m(),runtime.cpu_usage().total(),runtime.memory_usage().current())
		logging.info("{} {} {} {}\nLast min.: {} Mcycles {} MB\nCurrent: {} {}".format(de_la,la,','.join(rezults),str(dur),runtime.cpu_usage().rate1m(),runtime.memory_usage().average1m(),runtime.cpu_usage().total(),runtime.memory_usage().current()))
		self.setPrimerInfo(de_la,la,','.join(rezults),str(dur),stats)
Ejemplo n.º 32
0
    def createCsv(cls, helper, login_operator_entry=None):
        with_cursor = True
        csv_records = []
        # タイトル
        titles = SearchUtils.getCsvTitles(helper)
        csv_records.append(UcfUtil.createCsvRecordEx(titles))

        # データ一覧取得
        q = sateraito_db.SearchList.query()
        q = q.order(sateraito_db.SearchList.search_name)
        logging.info('with_cursor=' + str(with_cursor))

        max_export_cnt = -1
        cnt = 0
        limit = 1000  # 通常の、max_export_cnt == 1000 のドメインは1発で取れたほうがいいはずなので 1000 とする
        start_cursor = None
        while True:

            if with_cursor:
                if start_cursor is not None:
                    each_rows, start_cursor, more = q.fetch_page(
                        limit, start_cursor=start_cursor)
                else:
                    each_rows, start_cursor, more = q.fetch_page(limit)
            else:
                each_rows = q.iter(limit=limit, offset=cnt)

            each_cnt = 0
            for entry in each_rows:

                vo = entry.exchangeVo(helper._timezone)
                SearchUtils.editVoForCsv(helper, vo)

                data = []
                data.append('IU')  # command
                data.append(UcfUtil.getHashStr(vo,
                                               'search_name'))  # search_name
                data.append(UcfUtil.getHashStr(
                    vo, 'search_config'))  # search_config

                csv_records.append(UcfUtil.createCsvRecordEx(data))
                each_cnt += 1

                vo = None
                entry = None
                if each_cnt % 100 == 0:
                    gc.collect()

            cnt += each_cnt
            logging.info(cnt)

            # 件数上限
            if with_cursor:
                if cnt >= max_export_cnt or not more:
                    break
            else:
                if (max_export_cnt > 0
                        and cnt >= max_export_cnt) or each_cnt < limit:
                    break

        csv_text = '\r\n'.join(csv_records)

        current_memory_usage = runtime.memory_usage().current()
        gc.collect()
        current_memory_usage_after_collect = runtime.memory_usage().current()
        logging.info('current_memory_usage=' + str(current_memory_usage) +
                     ' after_collect=' +
                     str(current_memory_usage_after_collect))

        return csv_text
Ejemplo n.º 33
0
    def post(self):
        try:
            #global global_dict
            #global_dict = {}
            starttime = time.time()
            cutoff_date = datetime.datetime.now() + datetime.timedelta(-365)
            cutoff_date_string = cutoff_date.strftime("%Y-%m-%d %H:%M:%S")

            parts = self.request.body.split("&")
            requests = {}

            if parts is not None and parts[0] != "":
                for pair in parts:
                    ab = pair.split('=')
                    requests[ab[0]] = ab[1]

            force = bool(requests.get("force", False))
            write = bool(requests.get("write", False))
            minwrite = bool(requests.get("minwrite", False))

            rpcList = []
            client = memcache.Client()

            q = structures.Rumble.all()
            rumbles = []
            for r in q.run():
                memr = memcache.get(r.Name)
                if memr is not None:
                    r = memr
                if r.BatchScoresAccurate and not force:
                    continue
                rumbles.append(r)

            for r in rumbles:
                scoresdicts = pickle.loads(
                    zlib.decompress(r.ParticipantsScores))
                entries = len(scoresdicts)
                r.__dict__["entries"] = entries
            rumbles.sort(key=lambda r: -r.__dict__["entries"])

            first = True
            for r in rumbles:
                if not first:
                    time.sleep(5)
                    gc.collect()
                    gc.collect(2)
                first = False

                logging.info("mem usage at start of " + r.Name + ": " +
                             str(runtime.memory_usage().current()) + "MB")
                try:
                    scores = pickle.loads(zlib.decompress(
                        r.ParticipantsScores))
                except:
                    scoresdicts = marshal.loads(
                        zlib.decompress(r.ParticipantsScores))
                    scoreslist = [structures.LiteBot() for _ in scoresdicts]
                    for s, d in zip(scoreslist, scoresdicts):
                        s.__dict__.update(d)
                    scores = {s.Name: s for s in scoreslist}

                if len(scores) == 0:
                    continue

                r.ParticipantsScores = None
                #gc.collect()

                particHash = [p + "|" + r.Name for p in scores]

                particSplit = list_split(particHash, 32)
                ppDict = {}
                for l in particSplit:
                    ppDict.update(memcache.get_multi(l))
                    time.sleep(0.1)

                particSplit = None

                bots = [ppDict.get(h, None) for h in particHash]

                botsdict = {}

                missingHashes = []
                missingIndexes = []
                for i in xrange(len(bots)):
                    if bots[i] is None:
                        missingHashes.append(particHash[i])
                        missingIndexes.append(i)

                    elif isinstance(bots[i], structures.BotEntry):
                        bots[i] = structures.CachedBotEntry(bots[i])

                if len(missingHashes) > 0:
                    bmis = structures.BotEntry.get_by_key_name(missingHashes)

                    #lost = False
                    lostList = []

                    for i in xrange(len(missingHashes)):
                        if bmis[i] is not None:
                            cb = structures.CachedBotEntry(bmis[i])
                            bots[missingIndexes[i]] = cb
                            botsdict[missingHashes[i]] = cb

                        else:
                            bots[missingIndexes[i]] = None
                            lostList.append(missingHashes[i])
                            #lost = True

                while len(particHash) > 0:
                    particHash.pop()
                particHash = None

                while len(missingHashes) > 0:
                    missingHashes.pop()
                missingHashes = None

                while len(missingIndexes) > 0:
                    missingIndexes.pop()
                missingIndexes = None

                logging.info("mem usage after loading bots: " +
                             str(runtime.memory_usage().current()) + "MB")

                bots = filter(lambda b: b is not None, bots)

                get_key = attrgetter("APS")
                bots.sort(key=lambda b: get_key(b), reverse=True)

                gc.collect()

                botIndexes = {}
                for i, b in enumerate(bots):
                    b.Name = b.Name.encode('ascii')
                    intern(b.Name)
                    botIndexes[b.Name] = i
                    b.VoteScore = 0.

                botlen = len(bots)
                APSs = numpy.empty([botlen, botlen])
                APSs.fill(numpy.nan)
                totalAlivePairs = 0
                for i, b in enumerate(bots):
                    try:
                        pairings = pickle.loads(zlib.decompress(
                            b.PairingsList))
                    except:
                        pairsDicts = marshal.loads(
                            zlib.decompress(b.PairingsList))

                        pairings = [structures.ScoreSet() for _ in pairsDicts]
                        for s, d in zip(pairings, pairsDicts):
                            s.__dict__.update(d)
                    removes = []
                    alivePairings = 0
                    for q, p in enumerate(pairings):
                        j = botIndexes.get(p.Name, -1)
                        if j != -1:
                            APSs[j, i] = numpy.float64(p.APS)
                            p.Alive = True
                            alivePairings += 1
                        else:
                            removes.append(q)
                    b.Pairings = alivePairings
                    totalAlivePairs += alivePairings
                    removes.reverse()
                    removed = False
                    for q in removes:
                        p = pairings[q]
                        if p.LastUpload < cutoff_date_string:
                            removed = True
                            pairings.pop(q)
                        else:
                            if p.Alive:
                                removed = True
                            p.Alive = False
                    if removed:
                        b.PairingsList = zlib.compress(
                            pickle.dumps(pairings, -1), 1)

                gc.collect()

                APSs += numpy.float64(100) - APSs.transpose()
                APSs *= numpy.float64(0.5)

                numpy.fill_diagonal(APSs, numpy.nan)

                gc.collect()
                logging.info(
                    str(len(bots)) + " bots loaded, total of " +
                    str(totalAlivePairs) + " alive pairings")
                logging.info("mem usage after unzipping pairings: " +
                             str(runtime.memory_usage().current()) + "MB")

                #Vote
                mins = numpy.nanmax(APSs, 1)
                for i, minimum in enumerate(mins):
                    minIndexes = numpy.argwhere(APSs[i, ...] == minimum)
                    ties = len(minIndexes)
                    if ties > 0:
                        increment = 1. / ties
                        for minIndex in minIndexes:
                            bots[minIndex].VoteScore += increment

                #inv_len = 1.0/botlen
                for b in bots:
                    if b.Pairings > 0:
                        b.VoteScore = 100.0 * b.VoteScore / float(b.Pairings)
                    else:
                        b.VoteScore = 0

                #KNN PBI
                half_k = int(math.ceil(math.sqrt(botlen) / 2))
                KNN_PBI = -numpy.ones((botlen, botlen))
                for i in xrange(len(bots)):
                    low_bound = max([0, i - half_k])
                    high_bound = min([botlen - 1, i + half_k])
                    low_high_bound = min([i + 1, high_bound])
                    before = APSs[:, low_bound:i]
                    after = APSs[:, low_high_bound:high_bound]
                    compare = numpy.hstack((before, after))
                    mm = numpy.mean(numpy.ma.masked_array(
                        compare, numpy.isnan(compare)),
                                    axis=1)
                    KNN_PBI[:, i] = APSs[:, i] - mm.filled(numpy.nan)

    #                a[i] = 0
    #               logging.info("mean error of transpose: " + str(numpy.mean(numpy.square(a))))

    #KNN_PBI[KNN_PBI == numpy.nan] = -1

    #logging.info("mem usage after KNNPBI: " + str(runtime.memory_usage().current()) + "MB")
    # Avg Normalised Pairing Percentage

                mins = numpy.nanmin(APSs, 1)
                maxs = numpy.nanmax(APSs, 1)
                inv_ranges = numpy.float64(1.0) / (maxs - mins)
                NPPs = -numpy.ones((botlen, botlen))
                for i in range(botlen):
                    if numpy.isfinite(inv_ranges[i]):
                        NPPs[i, :] = numpy.float64(100) * (
                            APSs[i, :] - mins[i]) * inv_ranges[i]
                    else:
                        NPPs[i, :] = numpy.float64(100)

                #NPPs[NPPs] = -1

                #logging.info("mem usage after ANPP: " + str(runtime.memory_usage().current()) + "MB")

                changedBots = []  #bots with new pairings since last run

                # save to cache
                botsdict = {}

                for i, b in enumerate(bots):
                    #                try:
                    pairings = pickle.loads(zlib.decompress(b.PairingsList))
                    #                except:
                    #                    pairsDicts = marshal.loads(zlib.decompress(b.PairingsList))
                    #
                    #                    pairings = [structures.ScoreSet() for _ in pairsDicts]
                    #                    for s,d in zip(pairings,pairsDicts):
                    #                        s.__dict__.update(d)
                    nppCount = 0
                    totalNPP = 0.0

                    apsCount = 0
                    totalAPS = 0.0

                    aliveCount = 0

                    changed = False
                    for p in pairings:
                        j = botIndexes.get(p.Name, -1)
                        if j != -1:
                            p.Alive = True
                            changePotential = (p.KNNPBI == 0.0 and p.NPP == -1)

                            aliveCount += 1
                            p.KNNPBI = float(KNN_PBI[j, i])
                            p.NPP = float(NPPs[j, i])

                            if not numpy.isnan(APSs[j, i]):
                                p.APS = float(APSs[j, i])
                                totalAPS += p.APS
                                apsCount += 1

                            if numpy.isnan(p.KNNPBI):
                                p.KNNPBI = 0

                            if numpy.isnan(p.NPP):
                                p.NPP = -1
                            else:
                                totalNPP += p.NPP
                                nppCount += 1

                            if changePotential and p.KNNPBI != 0.0 and p.NPP != -1:
                                changed = True
                        else:
                            p.Alive = False
                            p.KNNPBI = 0
                            p.NPP = -1

                    if nppCount > 0:
                        b.ANPP = float(totalNPP / nppCount)
                    else:
                        b.ANPP = -1.0
                    if apsCount > 0:
                        b.APS = float(totalAPS / apsCount)
                    else:
                        b.APS = -1.0

                    b.PairingsList = zlib.compress(pickle.dumps(pairings, -1),
                                                   1)
                    b.Pairings = aliveCount
                    if b.Pairings > 0:
                        botsdict[b.key_name] = b
                    if changed:
                        changedBots.append(b)

                KNN_PBI = None
                APSs = None
                NPPs = None
                logging.info("mem usage after zipping: " +
                             str(runtime.memory_usage().current()) + "MB")

                gc.collect()
                #logging.info("mem usage after gc: " + str(runtime.memory_usage().current()) + "MB")
                if len(botsdict) > 0:
                    splitlist = dict_split(botsdict, 20)
                    logging.info("split bots into " + str(len(splitlist)) +
                                 " sections")

                    for d in splitlist:
                        rpcList.append(client.set_multi_async(d))
                        time.sleep(.5)  #throttle

                    logging.info("wrote " + str(len(botsdict)) +
                                 " bots to memcache")

                botsdict.clear()
                botsdict = None

                scores = {b.Name: structures.LiteBot(b) for b in bots}

                # bots = None
                r.ParticipantsScores = None
                gc.collect()

                r.ParticipantsScores = db.Blob(
                    zlib.compress(
                        pickle.dumps(scores, pickle.HIGHEST_PROTOCOL), 3))
                logging.info("mem usage after participants zipping: " +
                             str(runtime.memory_usage().current()) + "MB")
                #r.ParticipantsScores = zlib.compress(marshal.dumps([scores[s].__dict__ for s in scores]),4)
                scores = None

                if write:
                    writebots = [None] * len(bots)
                    for i, b in enumerate(bots):
                        putb = structures.BotEntry(key_name=b.key_name)
                        putb.init_from_cache(b)
                        writebots[i] = putb
                    write_lists = list_split(writebots, 50)
                    for subset in write_lists:
                        db.put(subset)
                        time.sleep(0.1)  #throttle
                    logging.info("wrote " + str(len(writebots)) +
                                 " bots to database")

                while len(bots) > 0:
                    bots.pop()
                bots = None

                if minwrite:
                    writebots = [None] * len(changedBots)
                    for i, b in enumerate(changedBots):
                        putb = structures.BotEntry(key_name=b.key_name)
                        putb.init_from_cache(b)
                        writebots[i] = putb
                    write_lists = list_split(writebots, 50)
                    for subset in write_lists:
                        db.put(subset)
                        time.sleep(0.1)
                    logging.info("wrote " + str(len(writebots)) +
                                 " changed bots to database")

                while len(changedBots) > 0:
                    changedBots.pop()
                changedBots = None
                gc.collect()

                if write or minwrite:
                    r.BatchScoresAccurate = True

                rpcList.append(client.set_multi_async({r.Name: r}))

                db.put([r])
                #gc.collect()
                r = None
                logging.info("mem usage after write: " +
                             str(runtime.memory_usage().current()) + "MB")

            for rpc in rpcList:
                rpc.get_result()

            elapsed = time.time() - starttime
            logging.info("Success in " + str(round(1000 * elapsed) / 1000) +
                         "s")
            self.response.out.write("Success in " +
                                    str(round(1000 * elapsed)) + "ms")
        except:
            logging.exception('')
            elapsed = time.time() - starttime
            logging.info("Error in " + str(round(1000 * elapsed) / 1000) + "s")
            self.response.out.write("Error in " + str(round(1000 * elapsed)) +
                                    "ms")
Ejemplo n.º 34
0
    def processOfRequest(self, tenant, token):
        self._approot_path = os.path.dirname(__file__)

        # エラーが1回おきたら処理を終了する
        if (int(self.request.headers.environ['HTTP_X_APPENGINE_TASKRETRYCOUNT']
                ) > 1):
            logging.error('error over_1_times')
            return

        data_key = UcfUtil.nvl(self.getRequest('data_key'))
        data_kind = UcfUtil.nvl(self.getRequest('data_kind'))
        login_operator_id = UcfUtil.nvl(self.getRequest('login_operator_id'))
        login_operator_unique_id = UcfUtil.nvl(
            self.getRequest('login_operator_unique_id'))
        login_operator_mail_address = UcfUtil.nvl(
            self.getRequest('login_operator_mail_address'))
        login_operator_client_ip = UcfUtil.nvl(
            self.getRequest('login_operator_client_ip'))

        # オペレータ情報を取得
        login_operator_entry = None
        if login_operator_unique_id != '':
            login_operator_entry = OperatorUtils.getData(
                self, login_operator_unique_id)
            if login_operator_entry is None:
                raise Exception('Not found login operator information.')
                return

        # preparing blob reader
        # blobstore 保存しているバイナリデータを取得
        blob_key = str(urllib.unquote(self.request.get('key')))
        blob_reader = blobstore.BlobReader(blob_key)

        # ファイルデータを取得(ステータス=CREATINGで作成済)
        file_entry = FileUtils.getDataEntryByDataKey(self, data_key)
        if file_entry is None:
            raise Exception(self.getMsg('MSG_NOTFOUND_TARGET_FILE',
                                        (data_key)))
            return

        # タスクトークンの取得と更新
        last_task_token = file_entry.task_token if file_entry.task_token is not None else ''
        file_entry.task_token = token
        file_entry.put()

        file_vo = file_entry.exchangeVo(self._timezone)
        FileUtils.editVoForSelect(self, file_vo)

        file_encoding = UcfUtil.getHashStr(self.getDeptInfo(True),
                                           'file_encoding')
        if file_encoding == '' or file_encoding == 'SJIS':
            data_encoding = 'cp932'
        elif file_encoding == 'JIS':
            data_encoding = 'jis'
        elif file_encoding == 'EUC':
            data_encoding = 'euc-jp'
        elif file_encoding == 'UTF7':
            data_encoding = 'utf-7'
        elif file_encoding == 'UTF8':
            data_encoding = 'utf-8'
        elif file_encoding == 'UNICODE':
            data_encoding = 'utf-16'
        else:
            data_encoding = 'cp932'

        log_msg = []
        #is_error = False
        record_cnt = 0
        #insert_cnt = 0
        #update_cnt = 0
        #delete_cnt = 0
        #skip_cnt = 0
        #error_cnt = 0

        shutdown_record_cnt_str = self.request.get('shutdown_record_cnt')
        if shutdown_record_cnt_str is not None and shutdown_record_cnt_str != '':
            shutdown_record_cnt = int(shutdown_record_cnt_str)
        else:
            shutdown_record_cnt = 0
        logging.info('shutdown_record_cnt=' + str(shutdown_record_cnt))

        is_error_str = self.request.get('is_error')
        if is_error_str is not None and is_error_str.lower() == 'true':
            is_error = True
        else:
            is_error = False
        logging.info('is_error=' + str(is_error))

        insert_cnt_str = self.request.get('insert_cnt')
        if insert_cnt_str is not None and insert_cnt_str != '':
            insert_cnt = int(insert_cnt_str)
        else:
            insert_cnt = 0
        update_cnt_str = self.request.get('update_cnt')
        if update_cnt_str is not None and update_cnt_str != '':
            update_cnt = int(update_cnt_str)
        else:
            update_cnt = 0
        delete_cnt_str = self.request.get('delete_cnt')
        if delete_cnt_str is not None and delete_cnt_str != '':
            delete_cnt = int(delete_cnt_str)
        else:
            delete_cnt = 0
        skip_cnt_str = self.request.get('skip_cnt')
        if skip_cnt_str is not None and skip_cnt_str != '':
            skip_cnt = int(skip_cnt_str)
        else:
            skip_cnt = 0
        error_cnt_str = self.request.get('error_cnt')
        if error_cnt_str is not None and error_cnt_str != '':
            error_cnt = int(error_cnt_str)
        else:
            error_cnt = 0

        try:

            # 同じトークンで既に処理済みの場合、GAEのタスクが強制終了した後のリトライなのでログを出しておく
            if last_task_token == token:
                is_error = True
                log_msg.append(
                    self._formatLogRecord(
                        UcfMessage.getMessage(
                            self.getMsg('MSG_TASK_FORCE_RETRY'))))
                self.updateTaskStatus(file_vo, file_entry, log_msg, is_error,
                                      login_operator_unique_id,
                                      login_operator_id)
                del log_msg[:]

            logging.info('csv_analysis start...')
            new_lines = []
            str_record = ''
            quote_num = 0
            old_lines = blob_reader.read().splitlines()
            for line in old_lines:

                #str_record += lineline + '\n'
                #if str_record.count('"') % 2 == 0:
                #	new_lines.append(str_record.rstrip('\n'))
                #	str_record = ''

                quote_num += line.count('"')
                if quote_num % 2 == 0:
                    new_lines.append(str_record + line)
                    str_record = ''
                    quote_num = 0
                else:
                    str_record += line + '\n'

            logging.info('csv_analysis end. the record count is ' +
                         str(len(new_lines)) + ' with title line.')

            # 巨大なCSVファイルを扱えるように対応 2015.03.27
            csv.field_size_limit(1000000000)

            # process uploaded csv file
            # universal-newline mode に対応
            #csvfile = csv.reader(blob_reader, dialect=csv.excel)
            #csvfile = csv.reader(blob_reader.read().splitlines(), dialect=csv.excel)
            csvfile = csv.reader(new_lines, dialect=csv.excel)

            col_names = []
            for row in csvfile:
                # タイトル行の処理
                if record_cnt == 0:
                    # first row: column list
                    col_index = 0
                    for col in row:
                        # BOM付CSVに対応 2016.10.13
                        if data_encoding == 'utf-8' and col_index == 0:
                            col = col.decode('utf-8-sig').encode('utf-8')
                        col_name = col.strip().strip('"')
                        #					# 条件を削除し、一列目の情報は全て列を作成
                        col_names.append(col_name)
                        col_index += 1

                # データ行の処理
                elif shutdown_record_cnt <= record_cnt - 1:

                    is_runtime_shutdown = False
                    is_force_runtime_shutdown = False
                    # 5レコードに一回チェックしてみる
                    # シャットダウンを検知した場合
                    if record_cnt % 5 == 0:
                        is_runtime_shutdown = runtime.is_shutting_down()
                    # 強制対応はとりあえずコメントアウト
                    ## シャットダウン検知しない場合も多いので、500レコードに一回ずつ別タスクにする ⇒ 100 に変更 2014.06.12
                    #if (shutdown_record_cnt < record_cnt - 1) and (record_cnt - 1) % 100 == 0:
                    #	is_force_runtime_shutdown = True

                    if is_runtime_shutdown or is_force_runtime_shutdown:
                        is_shutting_down = True
                        current_memory_usage = runtime.memory_usage().current()
                        logging.info('is_shutting_down=' +
                                     str(is_runtime_shutdown) +
                                     ' current_memory_usage=' +
                                     str(current_memory_usage))

                        # instance will be shut down soon!
                        # exit here and kick same batch to start next record
                        logging.info(
                            '***** kicking same batch and stopping: shutdown_record_cnt='
                            + str(record_cnt - 1))
                        # サマリをログ出力
                        log_msg.append(
                            self._formatLogRecord(
                                'development process [record:' +
                                UcfUtil.nvl(record_cnt - 1) + ' skip:' +
                                UcfUtil.nvl(skip_cnt) + ' insert:' +
                                UcfUtil.nvl(insert_cnt) + ' update:' +
                                UcfUtil.nvl(update_cnt) + ' delete:' +
                                UcfUtil.nvl(delete_cnt) + ' error:' +
                                UcfUtil.nvl(error_cnt) + ' ]'))
                        log_msg.append(
                            self._formatLogRecord(
                                'kicking same batch and stopping: shutdown_record_cnt='
                                + str(record_cnt - 1)))
                        self.updateTaskStatus(file_vo, file_entry, log_msg,
                                              is_error,
                                              login_operator_unique_id,
                                              login_operator_id)
                        del log_msg[:]

                        # kick start import
                        import_q = taskqueue.Queue('csv-export-import')
                        params = {
                            'shutdown_record_cnt': record_cnt - 1,
                            'insert_cnt': insert_cnt,
                            'update_cnt': update_cnt,
                            'delete_cnt': delete_cnt,
                            'skip_cnt': skip_cnt,
                            'error_cnt': error_cnt,
                            'is_error': is_error,
                            'key': blob_key,
                            'data_key': data_key,
                            'data_kind': data_kind,
                            'login_operator_id': login_operator_id,
                            'login_operator_unique_id':
                            login_operator_unique_id,
                            'login_operator_mail_address':
                            login_operator_mail_address,
                            'login_operator_client_ip':
                            login_operator_client_ip
                        }

                        import_t = taskqueue.Task(
                            url='/a/' + tenant + '/' + token +
                            '/queue_csv_import',
                            params=params,
                            target=sateraito_func.getBackEndsModuleName(
                                tenant),
                            countdown='1')
                        import_q.add(import_t)
                        return

                    col_index = 0
                    # params に配列を作成する。
                    csv_record = {}
                    for col_value in row:
                        if col_index < len(col_names):
                            # cut off too much csv data columns
                            # csv_record[col_names[col_index]] = unicode(col_value, UcfConfig.DL_ENCODING).strip().strip('"')
                            # csv_record[col_names[col_index]] = unicode(col_value, data_encoding).strip().strip('"')
                            csv_record[col_names[col_index]] = unicode(
                                col_value, data_encoding)
                            col_index += 1

                    # 1行処理
                    deal_type = ''
                    row_log_msg = None
                    code = ''
                    if data_kind == 'importgroupcsv':
                        deal_type, code, row_log_msg = self.importOneRecordGroup(
                            csv_record, record_cnt, blob_key, data_key,
                            data_kind, login_operator_unique_id,
                            login_operator_id, login_operator_mail_address,
                            login_operator_client_ip, login_operator_entry)
                    # elif data_kind == 'importusercsv':
                    #   deal_type, code, row_log_msg = self.importOneRecordUser(csv_record, record_cnt, blob_key, data_key, data_kind, login_operator_unique_id, login_operator_id, login_operator_mail_address, login_operator_client_ip, login_operator_entry)
                    # elif data_kind == 'importchangeuseridcsv':
                    #   deal_type, code, row_log_msg = self.importOneRecordChangeUserID(csv_record, record_cnt, blob_key, data_key, data_kind, login_operator_unique_id, login_operator_id, login_operator_mail_address, login_operator_client_ip, login_operator_entry)

                    # 件数やエラーメッセージを集計
                    if row_log_msg is not None:
                        log_msg.extend(row_log_msg)
                    if code != '':
                        error_cnt += 1
                        is_error = True
                    if deal_type == UcfConfig.EDIT_TYPE_NEW:
                        insert_cnt += 1
                    elif deal_type == UcfConfig.EDIT_TYPE_RENEW:
                        update_cnt += 1
                    elif deal_type == UcfConfig.EDIT_TYPE_DELETE:
                        delete_cnt += 1
                    elif deal_type == UcfConfig.EDIT_TYPE_SKIP:
                        skip_cnt += 1

                    # ユーザーID変更処理はデリケートなので毎回ログを出す
                    if data_kind == 'importchangeuseridcsv' and log_msg is not None and len(
                            log_msg) > 0:
                        self.updateTaskStatus(file_vo, file_entry, log_msg,
                                              is_error,
                                              login_operator_unique_id,
                                              login_operator_id)
                        del log_msg[:]

                # ときどきメモリ開放
                if record_cnt % 100 == 0:
                    current_memory_usage = runtime.memory_usage().current()
                    gc.collect()
                    current_memory_usage2 = runtime.memory_usage().current()
                    logging.info('[memory_usage]record=' + str(record_cnt) +
                                 ' before:' + str(current_memory_usage) +
                                 ' after:' + str(current_memory_usage2))
                record_cnt += 1
        except BaseException, e:
            self.outputErrorLog(e)
            log_msg.append(self._formatLogRecord('system error.'))
            is_error = True
Ejemplo n.º 35
0
    def post(self):
        try:
            #global global_dict
            #global_dict = {}
            starttime = time.time()
            cutoff_date = datetime.datetime.now() + datetime.timedelta(-365)
            cutoff_date_string = cutoff_date.strftime("%Y-%m-%d %H:%M:%S")
    
            parts = self.request.body.split("&")
            requests = {}
            
            if parts is not None and parts[0] != "":
                for pair in parts:
                    ab = pair.split('=')
                    requests[ab[0]] = ab[1]
            
            force = bool(requests.get("force",False))
            write = bool(requests.get("write",False))
            minwrite = bool(requests.get("minwrite",False))
            
            rpcList = []
            client = memcache.Client()
            
            q = structures.Rumble.all()
            rumbles = []
            for r in q.run():
                memr = memcache.get(r.Name)
                if memr is not None:
                    r = memr
                if r.BatchScoresAccurate and not force:
                    continue
                rumbles.append(r)
                
            for r in rumbles:
                scoresdicts = pickle.loads(zlib.decompress(r.ParticipantsScores))
                entries = len(scoresdicts)
                r.__dict__["entries"] = entries 
            rumbles.sort(key = lambda r: -r.__dict__["entries"])
            
            first = True
            for r in rumbles:
                if not first:
                    time.sleep(5)
                    gc.collect()            
                    gc.collect(2)               
                first = False
                
                logging.info("mem usage at start of " + r.Name + ": " + str(runtime.memory_usage().current()) + "MB")
                try:
                    scores = pickle.loads(zlib.decompress(r.ParticipantsScores))
                except:
                    scoresdicts = marshal.loads(zlib.decompress(r.ParticipantsScores))
                    scoreslist = [structures.LiteBot() for _ in scoresdicts]
                    for s,d in zip(scoreslist,scoresdicts):
                        s.__dict__.update(d)
                    scores = {s.Name:s for s in scoreslist}
                
                if len(scores) == 0:
                    continue
                        
                r.ParticipantsScores = None
                #gc.collect()
    
                particHash = [p + "|" + r.Name for p in scores]
                
                particSplit = list_split(particHash,32)
                ppDict = {}
                for l in particSplit:
                    ppDict.update(memcache.get_multi(l))
                    time.sleep(0.1)
                
                
                particSplit = None
                
                bots = [ppDict.get(h,None) for h in particHash]
                
                botsdict = {}
    
    
                missingHashes = []
                missingIndexes = []
                for i in xrange(len(bots)):
                    if bots[i] is None:
                        missingHashes.append(particHash[i])
                        missingIndexes.append(i)
                    
                    elif isinstance(bots[i],structures.BotEntry):
                        bots[i] = structures.CachedBotEntry(bots[i])
                        
                if len(missingHashes) > 0:
                    bmis = structures.BotEntry.get_by_key_name(missingHashes)
    
                    #lost = False
                    lostList = []
    
                    for i in xrange(len(missingHashes)):
                        if bmis[i] is not None:
                            cb = structures.CachedBotEntry(bmis[i])
                            bots[missingIndexes[i]] = cb
                            botsdict[missingHashes[i]] = cb
                            
                        else:
                            bots[missingIndexes[i]] = None
                            lostList.append(missingHashes[i])
                            #lost = True
    
                while len(particHash) > 0:
                    particHash.pop()    
                particHash = None
                
                while len(missingHashes) > 0:
                    missingHashes.pop()
                missingHashes = None
                
                while len(missingIndexes) > 0:
                    missingIndexes.pop()
                missingIndexes = None
                
                logging.info("mem usage after loading bots: " + str(runtime.memory_usage().current()) + "MB")     
    
                bots = filter(lambda b: b is not None, bots)
                
                get_key = attrgetter("APS")
                bots.sort( key=lambda b: get_key(b), reverse=True)
                
                gc.collect()   
       
                botIndexes = {}
                for i,b in enumerate(bots):
                    b.Name = b.Name.encode('ascii')
                    intern(b.Name)
                    botIndexes[b.Name] = i
                    b.VoteScore = 0.
                
                botlen = len(bots)
                APSs = numpy.empty([botlen,botlen])  
                APSs.fill(numpy.nan)
                totalAlivePairs = 0
                for i,b in enumerate(bots):    
                    try:
                        pairings = pickle.loads(zlib.decompress(b.PairingsList))
                    except:
                        pairsDicts = marshal.loads(zlib.decompress(b.PairingsList))
    
                        pairings = [structures.ScoreSet() for _ in pairsDicts]
                        for s,d in zip(pairings,pairsDicts):
                            s.__dict__.update(d)                
                    removes = []
                    alivePairings = 0
                    for q,p in enumerate(pairings):
                        j = botIndexes.get(p.Name,-1)
                        if j != -1:
                            APSs[j,i] = numpy.float64(p.APS)
                            p.Alive = True
                            alivePairings += 1
                        else:
                            removes.append(q)
                    b.Pairings = alivePairings
                    totalAlivePairs += alivePairings
                    removes.reverse()
                    removed = False
                    for q in removes:
                        p = pairings[q]
                        if p.LastUpload < cutoff_date_string:
                            removed = True
                            pairings.pop(q)
                        else:
                            if p.Alive:
                                removed = True
                            p.Alive = False
                    if removed:
                        b.PairingsList = zlib.compress(pickle.dumps(pairings,-1),1)
                    
                gc.collect()                            
                
                APSs += numpy.float64(100) - APSs.transpose()
                APSs *= numpy.float64(0.5)
                
                numpy.fill_diagonal(APSs, numpy.nan)
                
                gc.collect()
                logging.info(str(len(bots)) + " bots loaded, total of " + str(totalAlivePairs) + " alive pairings")
                logging.info("mem usage after unzipping pairings: " + str(runtime.memory_usage().current()) + "MB")        
                
                #Vote
                mins = numpy.nanmax(APSs,1)
                for i,minimum in enumerate(mins):
                    minIndexes = numpy.argwhere(APSs[i,...] == minimum)
                    ties = len(minIndexes)
                    if ties > 0:
                        increment = 1./ties
                        for minIndex in minIndexes:
                            bots[minIndex].VoteScore += increment
    
                #inv_len = 1.0/botlen
                for b in bots:
                    if b.Pairings > 0:
                        b.VoteScore = 100.0*b.VoteScore/float(b.Pairings)
                    else:
                        b.VoteScore = 0
                    
                #KNN PBI
                half_k = int(math.ceil(math.sqrt(botlen)/2))
                KNN_PBI = -numpy.ones((botlen,botlen))
                for i in xrange(len(bots)):
                    low_bound = max([0,i-half_k])
                    high_bound = min([botlen-1,i+half_k])
                    low_high_bound = min([i+1,high_bound])
                    before = APSs[:,low_bound:i]
                    after = APSs[:,low_high_bound:high_bound]
                    compare = numpy.hstack((before,after))
                    mm = numpy.mean(numpy.ma.masked_array(compare,numpy.isnan(compare)),axis=1)
                    KNN_PBI[:,i] = APSs[:,i] - mm.filled(numpy.nan)
    
    #                a[i] = 0
     #               logging.info("mean error of transpose: " + str(numpy.mean(numpy.square(a))))
                
                #KNN_PBI[KNN_PBI == numpy.nan] = -1
    
                
                #logging.info("mem usage after KNNPBI: " + str(runtime.memory_usage().current()) + "MB")         
                # Avg Normalised Pairing Percentage
                
                mins = numpy.nanmin(APSs,1)            
                maxs = numpy.nanmax(APSs,1)
                inv_ranges = numpy.float64(1.0)/(maxs - mins)
                NPPs = -numpy.ones((botlen,botlen))
                for i in range(botlen):
                    if numpy.isfinite(inv_ranges[i]):
                        NPPs[i,:] = numpy.float64(100)*(APSs[i,:] - mins[i]) * inv_ranges[i]
                    else:
                        NPPs[i,:] = numpy.float64(100)
                
                #NPPs[NPPs] = -1
                
                #logging.info("mem usage after ANPP: " + str(runtime.memory_usage().current()) + "MB")   
                
                changedBots = []#bots with new pairings since last run
                
                # save to cache
                botsdict = {}
                
                for i,b in enumerate(bots):    
    #                try:
                    pairings = pickle.loads(zlib.decompress(b.PairingsList))
    #                except:
    #                    pairsDicts = marshal.loads(zlib.decompress(b.PairingsList))
    #
    #                    pairings = [structures.ScoreSet() for _ in pairsDicts]
    #                    for s,d in zip(pairings,pairsDicts):
    #                        s.__dict__.update(d)                
                    nppCount = 0
                    totalNPP = 0.0
                    
                    apsCount = 0
                    totalAPS = 0.0
                    
                    aliveCount = 0
                    
                    changed = False
                    for p in pairings:
                        j = botIndexes.get(p.Name,-1)
                        if j != -1:
                            p.Alive = True
                            changePotential = (p.KNNPBI == 0.0 and p.NPP == -1 )

                                
                            aliveCount += 1
                            p.KNNPBI = float(KNN_PBI[j,i])
                            p.NPP = float(NPPs[j,i])
    
                            if not numpy.isnan(APSs[j,i]):
                                p.APS = float(APSs[j,i])
                                totalAPS += p.APS
                                apsCount += 1
                                
                            if numpy.isnan(p.KNNPBI):
                                p.KNNPBI = 0
                            
                            if numpy.isnan(p.NPP):
                                p.NPP = -1
                            else:
                                totalNPP += p.NPP
                                nppCount += 1
                                
                            if changePotential and p.KNNPBI != 0.0 and p.NPP != -1 :
                                changed = True
                        else:
                            p.Alive = False
                            p.KNNPBI = 0
                            p.NPP = -1
                        
                    
                    if nppCount > 0:
                        b.ANPP = float(totalNPP/nppCount)
                    else:
                        b.ANPP = -1.0
                    if apsCount > 0:
                        b.APS = float(totalAPS/apsCount)
                    else:
                        b.APS = -1.0
                        
                
    
                    b.PairingsList = zlib.compress(pickle.dumps(pairings,-1),1)
                    b.Pairings = aliveCount
                    if b.Pairings > 0:
                        botsdict[b.key_name] = b
                    if changed:
                        changedBots.append(b)
                
                KNN_PBI = None
                APSs = None
                NPPs = None
                logging.info("mem usage after zipping: " + str(runtime.memory_usage().current()) + "MB")     
    
                gc.collect()
                #logging.info("mem usage after gc: " + str(runtime.memory_usage().current()) + "MB")     
                if len(botsdict) > 0:
                    splitlist = dict_split(botsdict,20)
                    logging.info("split bots into " + str(len(splitlist)) + " sections")
                    
                    for d in splitlist:
                        rpcList.append(client.set_multi_async(d))
                        time.sleep(.5) #throttle
                    
                    logging.info("wrote " + str(len(botsdict)) + " bots to memcache")

                
                
                botsdict.clear()
                botsdict = None
                
                scores = {b.Name: structures.LiteBot(b) for b in bots}
                
               # bots = None
                r.ParticipantsScores = None
                gc.collect()
                
                r.ParticipantsScores = db.Blob(zlib.compress(pickle.dumps(scores,pickle.HIGHEST_PROTOCOL),3))
                logging.info("mem usage after participants zipping: " + str(runtime.memory_usage().current()) + "MB")     
                #r.ParticipantsScores = zlib.compress(marshal.dumps([scores[s].__dict__ for s in scores]),4)
                scores = None
                
                if write:
                    writebots = [None]*len(bots)
                    for i,b in enumerate(bots):
                        putb = structures.BotEntry(key_name = b.key_name)
                        putb.init_from_cache(b)
                        writebots[i] = putb
                    write_lists = list_split(writebots,50)
                    for subset in write_lists:                    
                        db.put(subset)
                        time.sleep(0.1)#throttle
                    logging.info("wrote " + str(len(writebots)) + " bots to database")
                
                while len(bots) > 0:
                    bots.pop()
                bots = None
                
                if minwrite:
                    writebots = [None]*len(changedBots)
                    for i,b in enumerate(changedBots):
                        putb = structures.BotEntry(key_name = b.key_name)
                        putb.init_from_cache(b)
                        writebots[i] = putb
                    write_lists = list_split(writebots,50)
                    for subset in write_lists:                    
                        db.put(subset)
                        time.sleep(0.1)
                    logging.info("wrote " + str(len(writebots)) + " changed bots to database")
                
                while len(changedBots) > 0:
                    changedBots.pop()
                changedBots = None
                gc.collect()
                
                if write or minwrite:
                    r.BatchScoresAccurate = True
                
                rpcList.append(client.set_multi_async({r.Name:r}))
                
                db.put([r])
                #gc.collect()
                r = None
                logging.info("mem usage after write: " + str(runtime.memory_usage().current()) + "MB")     
                
                    
            for rpc in rpcList:
                rpc.get_result()
                
            elapsed = time.time() - starttime    
            logging.info("Success in " + str(round(1000*elapsed)/1000) + "s")
            self.response.out.write("Success in " + str(round(1000*elapsed)) + "ms")
        except:
            logging.exception('')
            elapsed = time.time() - starttime   
            logging.info("Error in " + str(round(1000*elapsed)/1000) + "s")
            self.response.out.write("Error in " + str(round(1000*elapsed)) + "ms")
Ejemplo n.º 36
0
    def get(self):
        repeat = self.request.get("repeat", 5)
        iter = self.request.get("iter", 100)

        self.response.headers['Content-Type'] = 'text/plain'
        
        stmt1 = """\
        import pytz
        from datetime import datetime
        tz_strings = ('Australia/Perth', 'Australia/Melbourne', 'Europe/London',
                      'America/Indiana/Indianapolis')
        for tz in tz_strings:
            dt = datetime(2009, 4, 15)
            pytz.timezone(tz)
        """
        
        stmt2 = """\
        import spytz3
        from datetime import datetime
        tz_strings = ('Australia/Perth', 'Australia/Melbourne', 'Europe/London',
                      'America/Indiana/Indianapolis')
        for tz in tz_strings:
            dt = datetime(2009, 4, 15)
            spytz3.timezone(tz)
        """

        stmt3 = """\
        import pytz
        from datetime import datetime
        tz_strings = ('Australia/Perth', 'Australia/Melbourne', 'Europe/London',
                      'America/Indiana/Indianapolis')
        for tz in tz_strings:
            dt = datetime(2009, 4, 15)
            pytz.timezone(tz)
        pytz.clear_cache()
        """
        
        stmt4 = """\
        import spytz3
        from datetime import datetime
        tz_strings = ('Australia/Perth', 'Australia/Melbourne', 'Europe/London',
                      'America/Indiana/Indianapolis')
        for tz in tz_strings:
            dt = datetime(2009, 4, 15)
            spytz3.timezone(tz)
        spytz3.clear_cache()
        """
        
        gc.collect()
        self.response.write("-- cache --\n")

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        t1 = timeit.repeat(stmt=stmt1, number=100, repeat=4)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("PYTZ    cpu:{}, memory: {}\n".format(cpu, mem))
        self.response.write("timeit: {}\n".format(t1))

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        gc.collect()
        time.sleep(1)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("SLEEP   cpu:{}, memory: {}\n".format(cpu, mem))

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        t2 = timeit.repeat(stmt=stmt2, number=100, repeat=4)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("SPYTZ   cpu:{}, memory: {}\n".format(cpu, mem))
        self.response.write("timeit: {}\n".format(t2))

        self.response.write("\n")
        self.response.write("-- clear cache --\n")

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        gc.collect()
        time.sleep(1)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("SLEEP   cpu:{}, memory: {}\n".format(cpu, mem))

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        t3 = timeit.repeat(stmt=stmt3, number=100, repeat=4)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("PYTZ    cpu:{}, memory: {}\n".format(cpu, mem))
        self.response.write("timeit: {}\n".format(t3))

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        gc.collect()
        time.sleep(1)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("SLEEP   cpu:{}, memory: {}\n".format(cpu, mem))

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        t4 = timeit.repeat(stmt=stmt4, number=100, repeat=4)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("SPYTZ   cpu:{}, memory: {}\n".format(cpu, mem))
        self.response.write("timeit: {}\n".format(t4))

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        gc.collect()
        time.sleep(1)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("SLEEP   cpu:{}, memory: {}\n".format(cpu, mem))
Ejemplo n.º 37
0
def data2store(data):
	if not data:
		logging.info("NO HAY DATOS QUE GUARDAR")
		return
	cachedata = json.loads(getGasole().decode('zlib'))
	if "_meta" in cachedata:		# compatibilidad con la api antigua
		cachedata = cachedata.get("_data")

	for p in data: # recorremos las provincias
		_provinces = []		# nuevas provincias
		_towns = []			# nuevas ciudades
		_stations = []		# nuevas gasolineras
		# _prices = []		# precios nuevos o actualizados
		_history = []		# nuevos históricos (tantos como _prices)
		_closed = []		# estaciones cerradas
		# _del_prices = []	# precios actuales a borrar
		datap = data.get(p)
		cachep = cachedata.get(p)
		if not cachep: # nueva provincia
			cachep = {}
			_provinces.append(Province(key_name=p))
		for t in datap: # recorremos las ciudades
			datat = datap[t]
			cachet = cachep.get(t)
			if not cachet:	# nueva ciudad
				cachet = cachep[t] = {}
				_towns.append(Town(key_name=t, parent=db.Key.from_path('Province', p)))
			for s in datat: # recorremos las estaciones
				datas = datat[s]
				caches = cachet.get(s)
				update_price = False
				if not caches: # nueva estación
					_stations.append(GasStation(
						key_name = s,
						parent = db.Key.from_path('Province', p, 'Town', t),
						label = datas["l"],
						hours = datas["h"],
						closed = False))
					update_price = True
				else:
					geopt = caches.get("g")
					if geopt:
						datas["g"]=geopt
					if caches["d"]!=datas["d"]: # distinta fecha
						update_price = True
					del cachet[s]				# la borramos de cachep: detección de cerradas
				if update_price:
					parent_key = db.Key.from_path('Province', p, 'Town', t, 'GasStation', s)
					date = Date(*datas["d"])
					props = dict((FUEL_OPTIONS[o]["short"], datas["o"][o]) for o in datas["o"])
					# _prices.append(PriceData(key_name=s, parent=parent_key, date=date, **props))
					_history.append(HistoryData(parent=parent_key, date=date, **props))
			if len(cachet)==0: 	# no quedan estaciones, para optimizar la búsqueda de cerradas
				del cachep[t]	# eliminamos la ciudad de cache
		# Estaciones cerradas, las que quedan en cachep:
		for t in cachep:
			for s in cachep[t]:
				caches = cachep[t][s]
				_closed.append(GasStation(
					key_name = s,
					parent = db.Key.from_path('Province', p, 'Town', t),
					label = caches["l"],
					hours = caches["h"],
					closed = True))
				# _del_prices.append(db.Key.from_path('Province', p, 'Town', t, 'GasStation', s, 'PriceData', s))
		newdata = _provinces+_towns+_stations+_history+_closed #+_prices
		if len(newdata):
			try:
				logging.info("==========Guardando datos de %s" %p)
				if len(_towns):
					logging.info("%s nuevas ciudades" %len(_towns))
				if len(_stations):
					logging.info("%s nuevas estaciones" %len(_stations))
				# if len(_prices):
				# 	logging.info("%s nuevos precios" %len(_prices))
				if len(_history):
					logging.info("%s históricos" %len(_history))
				if len(_closed):
					logging.info("%s estaciones CERRADAS" %len(_closed))
				# if len(_del_prices):
				# 	logging.info("%s precios BORRADOS" %len(_del_prices))
				updateDB(dnew=newdata)
				# json_data = json.dumps({"_data": {p: datap}})
				# ApiJson(key_name=p, json=json_data).put()
				logging.info("Uso de memoria: %s" %memory_usage().current())
			except Exception, e:
				logging.error("*************** No se han podido guardar los datos de %s" %p)
				logging.error(str(e))
				return
		del newdata
Ejemplo n.º 38
0
    def get(self):
        repeat = self.request.get("repeat", 5)
        iter = self.request.get("iter", 100)

        self.response.headers['Content-Type'] = 'text/plain'

        stmt1 = """\
        import pytz
        from datetime import datetime
        tz_strings = ('Australia/Perth', 'Australia/Melbourne', 'Europe/London',
                      'America/Indiana/Indianapolis')
        for tz in tz_strings:
            dt = datetime(2009, 4, 15)
            pytz.timezone(tz)
        """

        stmt2 = """\
        import spytz3
        from datetime import datetime
        tz_strings = ('Australia/Perth', 'Australia/Melbourne', 'Europe/London',
                      'America/Indiana/Indianapolis')
        for tz in tz_strings:
            dt = datetime(2009, 4, 15)
            spytz3.timezone(tz)
        """

        stmt3 = """\
        import pytz
        from datetime import datetime
        tz_strings = ('Australia/Perth', 'Australia/Melbourne', 'Europe/London',
                      'America/Indiana/Indianapolis')
        for tz in tz_strings:
            dt = datetime(2009, 4, 15)
            pytz.timezone(tz)
        pytz.clear_cache()
        """

        stmt4 = """\
        import spytz3
        from datetime import datetime
        tz_strings = ('Australia/Perth', 'Australia/Melbourne', 'Europe/London',
                      'America/Indiana/Indianapolis')
        for tz in tz_strings:
            dt = datetime(2009, 4, 15)
            spytz3.timezone(tz)
        spytz3.clear_cache()
        """

        gc.collect()
        self.response.write("-- cache --\n")

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        t1 = timeit.repeat(stmt=stmt1, number=100, repeat=4)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("PYTZ    cpu:{}, memory: {}\n".format(cpu, mem))
        self.response.write("timeit: {}\n".format(t1))

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        gc.collect()
        time.sleep(1)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("SLEEP   cpu:{}, memory: {}\n".format(cpu, mem))

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        t2 = timeit.repeat(stmt=stmt2, number=100, repeat=4)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("SPYTZ   cpu:{}, memory: {}\n".format(cpu, mem))
        self.response.write("timeit: {}\n".format(t2))

        self.response.write("\n")
        self.response.write("-- clear cache --\n")

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        gc.collect()
        time.sleep(1)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("SLEEP   cpu:{}, memory: {}\n".format(cpu, mem))

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        t3 = timeit.repeat(stmt=stmt3, number=100, repeat=4)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("PYTZ    cpu:{}, memory: {}\n".format(cpu, mem))
        self.response.write("timeit: {}\n".format(t3))

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        gc.collect()
        time.sleep(1)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("SLEEP   cpu:{}, memory: {}\n".format(cpu, mem))

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        t4 = timeit.repeat(stmt=stmt4, number=100, repeat=4)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("SPYTZ   cpu:{}, memory: {}\n".format(cpu, mem))
        self.response.write("timeit: {}\n".format(t4))

        mem_st = runtime.memory_usage().current()
        cpu_st = runtime.cpu_usage().total()
        gc.collect()
        time.sleep(1)
        mem = runtime.memory_usage().current() - mem_st
        cpu = runtime.cpu_usage().total() - cpu_st
        self.response.write("SLEEP   cpu:{}, memory: {}\n".format(cpu, mem))