Exemple #1
0
    def testRecordAPIs(self):
        self.sensor1 = Sensor.Create(self.e, "000-100", self.st.key().id())
        self.sensor1.put()
        now = datetime.now()
        r1_ts = tools.unixtime(now)
        r = Record.Create(tools.unixtime(now), self.sensor1,
                          {'location': '51.5033640,-0.1276250'})
        r2 = Record.Create(
            tools.unixtime(now) + 1000, self.sensor1,
            {'location': '51.5033640,-0.1276250'})
        db.put([r, r2])

        # Test list
        params = self.__commonParams()
        params.update({'sensor_kn': "000-100"})
        result = self.get_json("/api/data", params)
        self.assertTrue(result['success'])
        self.assertEqual(len(result['data']['records']), 2)

        # Test detail
        params = self.__commonParams()
        result = self.get_json("/api/data/%s/%s" % ("000-100", r1_ts), params)
        self.assertTrue(result['success'])
        _r = result['data']['record']
        self.assertEqual(_r['sensor_kn'], "000-100")
        self.assertEqual(_r['ts'], r1_ts)
        self.assertEqual(
            _r['kn'], "%s_%s_%s" %
            (self.e.key().id(), self.sensor1.key().name(), int(r1_ts)))
    def testGeoJsonIn(self):
        uri = "/%s/inbox/json/%s" % (self.e.key().id(), TEST_SENSOR_ID)
        lat = 1.3
        lon = 36.9
        MOVE_SIZE = 0.01
        MAX_ACCEL = 10
        N_POINTS = 10
        DELAY_SECS = 1
        now = datetime.now() - timedelta(seconds=60)

        # Populate dummy data with random moves
        data = []
        target_accel_mags = []
        for x in range(N_POINTS):
            now += timedelta(seconds=DELAY_SECS)
            lat += (random.random() - 0.5) * MOVE_SIZE
            lon += (random.random() - 0.5) * MOVE_SIZE
            loc = "%s,%s" % (lat, lon)
            ax = (random.random() * MAX_ACCEL) - MAX_ACCEL / 2
            ay = (random.random() * MAX_ACCEL) - MAX_ACCEL / 2
            az = (random.random() * MAX_ACCEL) - MAX_ACCEL / 2
            accel_mag = math.sqrt(pow(ax, 2) + pow(ay, 2) + pow(az, 2))
            target_accel_mags.append(accel_mag)
            data.append({
                'timestamp': tools.unixtime(dt=now),  # milliseconds
                'location': loc,
                'ax': ax,
                'ay': ay,
                'az': az
            })
        last_loc = loc
        body = json.dumps(data)
        response = self.post(uri, body)
        self.assertEqual(response.status_int, 200)
        content = json.loads(response.normal_body)
        self.assertTrue(content['success'])
        self.assertEqual(content['data']['count'], N_POINTS)

        # Fetch created records from db
        records = Record.Fetch(self.geosensor1)
        self.assertEqual(len(records), N_POINTS)
        last_r = records[0]
        self.assertEqual(tools.unixtime(last_r.dt_recorded),
                         tools.unixtime(now))

        accel_mags = [r.columnValue('accel_mag') for r in records]
        self.assertListEqual(accel_mags, list(reversed(target_accel_mags)))

        # Confirm sensor state update
        self.geosensor1 = Sensor.get(self.geosensor1.key())  # Refetch from db
        self.assertEqual(self.geosensor1.location, db.GeoPt(last_loc))
    def testGeoJsonIn(self):
        uri = "/%s/inbox/json/%s" % (self.e.key().id(), TEST_SENSOR_ID)
        lat = 1.3
        lon = 36.9
        MOVE_SIZE = 0.01
        MAX_ACCEL = 10
        N_POINTS = 10
        DELAY_SECS = 1
        now = datetime.now()

        # Populate dummy data with random moves
        data = []
        target_accel_mags = []
        for x in range(N_POINTS):
            now += timedelta(seconds=DELAY_SECS)
            lat += (random.random()-0.5) * MOVE_SIZE
            lon += (random.random()-0.5) * MOVE_SIZE
            loc = "%s,%s" % (lat, lon)
            ax = (random.random() * MAX_ACCEL) - MAX_ACCEL/2
            ay = (random.random() * MAX_ACCEL) - MAX_ACCEL/2
            az = (random.random() * MAX_ACCEL) - MAX_ACCEL/2
            accel_mag = math.sqrt(pow(ax,2)+pow(ay,2)+pow(az,2))
            target_accel_mags.append(accel_mag)
            data.append({
                'timestamp': tools.unixtime(dt=now),  # milliseconds
                'location': loc,
                'ax': ax,
                'ay': ay,
                'az': az
            })
        last_loc = loc
        body = json.dumps(data)
        response = self.post(uri, body)
        self.assertEqual(response.status_int, 200)
        content = json.loads(response.normal_body)
        self.assertTrue(content['success'])
        self.assertEqual(content['data']['count'], N_POINTS)

        # Fetch created records from db
        records = Record.Fetch(self.geosensor1)
        self.assertEqual(len(records), N_POINTS)
        last_r = records[0]
        self.assertEqual(tools.unixtime(last_r.dt_recorded), tools.unixtime(now))

        accel_mags = [r.columnValue('accel_mag') for r in records]
        self.assertListEqual(accel_mags, list(reversed(target_accel_mags)))

        # Confirm sensor state update
        self.geosensor1 = Sensor.get(self.geosensor1.key())  # Refetch from db
        self.assertEqual(self.geosensor1.location, db.GeoPt(last_loc))
Exemple #4
0
    def __init__(self, rkey, start_att="__key__", start_att_direction=""):
        self.report = Report.get(rkey)

        if not self.report:
            logging.error("Error retrieving report [ %s ] from db" % rkey)
            return
        self.report.status = REPORT.GENERATING
        self.report.put()

        self.counters = {'run': 0, 'skipped': 0}
        self.worker_start = tools.unixtime()
        self.cursor = None
        self.start_att = start_att
        self.start_att_direction = start_att_direction
        self.worker_cancelled = False
        self.prefetch_props = []
        self.date_columns = []
        self.headers = []
        self.date_att = None
        self.projection = None
        self.query = None
        self.batch_size = 300
        self.report_prog_mckey = MC_EXPORT_STATUS % self.report.key()
        self.setProgress({'val': 0, "status": REPORT.GENERATING})
        self.gcs_file = gcs.open(self.getGCSFilename(), 'w')
        self.setup()

        # From: https://code.google.com/p/googleappengine/issues/detail?id=8809
        logservice.AUTOFLUSH_ENABLED = True
        logservice.AUTOFLUSH_EVERY_BYTES = None
        logservice.AUTOFLUSH_EVERY_SECONDS = 1
        logservice.AUTOFLUSH_EVERY_BYTES = 1024
        logservice.AUTOFLUSH_EVERY_LINES = 1
Exemple #5
0
 def last_deactivation_ts(self, rule_index):
     alarms = self.recent_alarms[rule_index]
     if alarms:
         last_alarm = alarms[0]
         if last_alarm:
             return tools.unixtime(last_alarm.dt_end)
     return None
 def __createNewRecords(self,
                        data,
                        first_dt=None,
                        interval_secs=3,
                        sensor=None):
     if not sensor:
         sensor = self.vehicle_1
     now = first_dt if first_dt else datetime.now()
     records = []
     N = len(data.values()[0])
     for i in range(N):
         _r = {}
         for column, vals in data.items():
             _r[column] = vals[i]
         now += timedelta(seconds=interval_secs)
         r = Record.Create(tools.unixtime(now),
                           sensor,
                           _r,
                           allow_future=True)
         records.append(r)
     db.put(records)
     sensor.dt_updated = datetime.now()
     sensor.put()
     logging.debug("Created %d records" % len(records))
     if records:
         return records[-1].dt_recorded  # Datetime of last record created
     else:
         return None
Exemple #7
0
    def writeData(self):
        total_i = self.counters['run']
        while True:
            self.query = self._get_gql_query()
            if self.query:
                entities, self.cursor, more = self.KIND.gql(self.query).fetch_page(self.batch_size, start_cursor=self.cursor)
                if not entities:
                    logging.debug("No rows returned by query -- done")
                    return
                else:
                    logging.debug("Got %d rows" % len(entities))
                for entity in entities:
                    if entity:
                        ed = self.entityData(entity)
                    else:
                        continue
                    if self.report.ftype == REPORT.CSV:
                        csv.writer(self.gcs_file).writerow(tools.normalize_list_to_ascii(ed))
                    self.gcs_file.flush()

                    total_i += 1
                    self.counters['run'] += 1
                    if total_i % 100 == 0:
                        cancelled = self.updateProgressAndCheckIfCancelled()
                        if cancelled:
                            self.report.CleanDelete()
                            logging.debug("Worker cancelled by user, report deleted.")
                            return

                logging.debug("Batch of %d done" % len(entities))
                elapsed_ms = tools.unixtime() - self.worker_start
                elapsed = elapsed_ms / 1000
                if elapsed >= MAX_REQUEST_SECONDS or (tools.on_dev_server() and TEST_TOO_LONG_ON_EVERY_BATCH):
                    logging.debug("Elapsed %ss" % elapsed)
                    raise TooLongError()
Exemple #8
0
 def __createNewRecords(self,
                        data,
                        first_dt=None,
                        interval_secs=3,
                        sensor=None):
     if not sensor:
         sensor = self.vehicle_1
     now = first_dt if first_dt else datetime.now()
     records = []
     N = len(data.values()[0])
     for i in range(N):
         _r = {}
         for column, vals in data.items():
             _r[column] = vals[i]
         if 'ts' in data:
             # If ts passed in record, overrides
             now = util.ts_to_dt(data['ts'])
         else:
             now += timedelta(seconds=interval_secs)
         r = Record.Create(tools.unixtime(now),
                           sensor,
                           _r,
                           allow_future=True)
         records.append(r)
     db.put(records)
     sensor.dt_updated = datetime.now()
     sensor.put()
     logging.debug("Created %d records" % len(records))
     return records[-1]
Exemple #9
0
    def run(self, start_cursor=None):
        self.worker_start = tools.unixtime()
        self.cursor = start_cursor

        if not start_cursor:
            self.writeHeaders()

        try:
            # This is heavy
            self.writeData()
        except TooLongError:
            logging.debug("TooLongError: Going to the next batch")
            if self.report:
                self.finish(reportDone=False)
                tools.safe_add_task(self.run,
                                    start_cursor=self._get_cursor(),
                                    _queue="report-queue")
        except Exception, e:  # including DeadlineExceededError
            traceback.print_exc()
            logging.error("Error: %s" % e)
            self.setProgress({
                'error': "Error occurred: %s" % e,
                'status': REPORT.ERROR
            })
            return
Exemple #10
0
def update_article(access_token, item_id, action='favorite'):
    '''
    Favorite or archive (mark read) an article
    '''
    actions = json.dumps(
        [
            {
                "action": action,
                "item_id": item_id,
                "time": str(int(tools.unixtime(ms=False)))
            }
        ]
    )
    data = urllib.urlencode({
        'access_token': access_token,
        'consumer_key': POCKET_CONSUMER_KEY,
        'actions': actions
    })
    logging.debug(data)
    res = urlfetch.fetch(
        url=MODIFY_ENDPOINT + "?" + data,
        method=urlfetch.GET,
        validate_certificate=True)
    logging.debug(res.content)
    if res.status_code == 200:
        result = json.loads(res.content)
        ok = result.get('status', 0) == 1
        return ok
    else:
        logging.debug(res.headers)
    return False
Exemple #11
0
 def run(self):
     self.start = datetime.now()
     self.setup()
     logging.debug("Starting run %s" % self)
     try:
         while True:
             batch = self.fetchBatch()
             if batch:
                 self.runBatch(batch)
                 self.checkDeadline()
             else:
                 self.finish()
                 break
     except (TooLongError, DeadlineExceededError):
         logging.debug("Deadline expired, creating new request... Records: %s, Continuations: %s, Last record: %s" % (self.records_processed, self.continuations, self.last_record))
         self.continuations += 1
         task_name = self.sensorprocess.process_task_name(subset="cont_%s" % tools.unixtime())
         tools.safe_add_task(self.run, _name=task_name, _queue="processing-queue-new")
     except (Shutdown):
         logging.debug("Finishing because instance shutdown...")
         self.finish(result=PROCESS.ERROR, narrative="Instance shutdown")
     except Exception, e:
         logging.error("Uncaught error: %s" % e)
         traceback.print_exc()
         self.finish(result=PROCESS.ERROR, narrative="Processing Error: %s" % e)
Exemple #12
0
    def run(self, start_cursor=None):
        self.worker_start = tools.unixtime()
        if self.has_section_files() and len(self.section_gcs_files) != len(self.repeat_sections):
            for section_name, section_questions in self.repeat_sections:
                self.section_gcs_files.append(gcs.open(self.getGCSFilename(suffix=section_name), 'w'))

        self.cursor = start_cursor
        self.setProgress({'max':self.count(), 'report': self.report.json()})
        
        if not start_cursor:
            self.writeHeaders()
        
        try:
            # This is heavy
            self.writeData()
        except TooLongError:
            logging.debug("TooLongError: Going to the next batch")
            if self.report:
                self.finish(reportDone=False)
                tools.safe_add_task(self.run, start_cursor=self._get_cursor(), _queue="worker-queue")
        except Exception, e:  # including DeadlineExceededError
            traceback.print_exc()
            logging.error("Error: %s" % e)
            self.setProgress({'error': "Error occurred: %s" % e, 'status': REPORT.ERROR})
            return
    def testNonSaneFutureRecords(self):
        uri = "/%s/inbox/json/%s" % (self.e.key().id(), TEST_SENSOR_ID)
        lat = 1.3
        lon = 36.9
        MOVE_SIZE = 0.01
        N_POINTS = 10
        DELAY_SECS = 1
        now = datetime.now()

        # Populate dummy data with random moves
        data = []
        for x in range(N_POINTS):
            now += timedelta(seconds=DELAY_SECS)
            lat += (random.random() - 0.5) * MOVE_SIZE
            lon += (random.random() - 0.5) * MOVE_SIZE
            loc = "%s,%s" % (lat, lon)
            data.append({
                'timestamp':
                tools.unixtime(dt=now) +
                1000 * 60 * 60 * 24 * 30,  # Non-sane (1 month in future)
                'location':
                loc
            })
        last_loc = loc
        body = json.dumps(data)
        response = self.post(uri, body)
        self.assertEqual(response.status_int, 200)
        content = json.loads(response.normal_body)
        self.assertTrue(content['success'])
        self.assertEqual(content['data']['count'], N_POINTS)

        # Fetch created records from db
        records = Record.Fetch(self.geosensor1)
        self.assertEqual(len(records), 0)  # No records saved, all non-sane
Exemple #14
0
 def finish(self, reportDone=True):
     """Called when the worker has finished, to allow for any final work to be done."""
     progress = None
     if reportDone:
         self.gcs_file.close()
         self.report.status = REPORT.DONE
         self.report.dt_generated = datetime.now()
         self.report.put()
         duration = self.report.get_duration()
         logging.debug("GCSReportWorker finished. Counters: %s. Report ran for %d seconds." % (self.counters, duration))
         progress = {
             "status": REPORT.DONE,
             "resource": self.report.get_gcs_file(),
             "generated": tools.unixtime(dt=self.report.dt_generated),
             "report": self.report.json(),
             "duration": duration
         }
     else:
         logging.debug("Batch finished. Counters: %s" % (self.counters))
     p = {
         'val': self.counters['run'],
         "filename": self.report.title
     }
     if progress:
         p.update(progress)
     self.setProgress(p)
     gc.collect()  # Garbage collector
Exemple #15
0
def update_article(access_token, item_id, action='favorite'):
    '''
    Favorite or archive (mark read) an article
    '''
    actions = json.dumps(
        [
            {
                "action": action,
                "item_id": item_id,
                "time": str(int(tools.unixtime(ms=False)))
            }
        ]
    )
    data = urllib.urlencode({
        'access_token': access_token,
        'consumer_key': POCKET_CONSUMER_KEY,
        'actions': actions
    })
    logging.debug(data)
    res = urlfetch.fetch(
        url=MODIFY_ENDPOINT + "?" + data,
        method=urlfetch.GET,
        validate_certificate=True)
    logging.debug(res.content)
    if res.status_code == 200:
        result = json.loads(res.content)
        ok = result.get('status', 0) == 1
        return ok
    else:
        logging.debug(res.headers)
    return False
Exemple #16
0
    def writeData(self):
        total_i = self.counters['run']
        while True:
            self.query = self._get_gql_query()
            if self.query:
                entities, self.cursor, more = self.KIND.gql(self.query).fetch_page(self.batch_size, start_cursor=self.cursor)
                if not entities:
                    logging.debug("No rows returned by query -- done")
                    return
                else:
                    logging.debug("Got %d rows" % len(entities))
                for entity in entities:
                    if entity:
                        ed = self.entityData(entity)
                    else:
                        continue
                    if self.report.ftype == REPORT.CSV:
                        csv.writer(self.gcs_file).writerow(tools.normalize_list_to_ascii(ed))
                    self.gcs_file.flush()

                    total_i += 1
                    self.counters['run'] += 1
                    if total_i % 100 == 0:
                        cancelled = self.updateProgressAndCheckIfCancelled()
                        if cancelled:
                            self.report.CleanDelete()
                            logging.debug("Worker cancelled by user, report deleted.")
                            return

                logging.debug("Batch of %d done" % len(entities))
                elapsed_ms = tools.unixtime() - self.worker_start
                elapsed = elapsed_ms / 1000
                if elapsed >= MAX_REQUEST_SECONDS or (tools.on_dev_server() and TEST_TOO_LONG_ON_EVERY_BATCH):
                    logging.debug("Elapsed %ss" % elapsed)
                    raise TooLongError()
Exemple #17
0
    def runBatch(self, records):
        '''Run processing on batch of records.

        Processing has two main steps:
            1) Processing each record and firing alarms if any of the tasks'
                rule conditions are met.
            2) For each processer defined, calculate the value as defined by
                the expressions, and update an analysis object with the specified
                key name.

        '''
        # Standard processing (alarms)
        self.new_alarms = []
        for record in records:
            new_alarm = self.processRecord(record)
            if new_alarm:
                self.new_alarms.append(new_alarm)

        # Analysis processing
        if self.processers:
            for processer in self.processers:
                run_ms = tools.unixtime(records[-1].dt_recorded) if records else 0
                self._run_processer(processer, records=records, run_ms=run_ms)

        # TODO: Can we do this in finish?
        db.put(self.analyses.values())

        logging.debug("Ran batch of %d." % (len(records)))
        self.records_processed += len(records)
Exemple #18
0
    def processRecord(self, record):
        # Listen for alarms
        # TODO: delays between two data points > NO DATA
        alarm = None
        for i, rule in enumerate(self.rules):
            activate, deactivate, value = self.__update_condition_status(
                i, record)
            if activate:
                alarm, alarm_processers = Alarm.Create(self.sensor, rule,
                                                       record)
                alarm.put()
                if alarm_processers:
                    for processer in alarm_processers:
                        self._run_processer(processer,
                                            run_ms=tools.unixtime(
                                                alarm.dt_start))
                self.active_rules[i] = alarm
                self.recent_alarms[i].insert(0, alarm)  # Prepend
            elif deactivate:
                ar = self.active_rules[i]
                if ar:
                    ar.deactivate()
                    self.updated_alarm_dict[str(ar.key())] = ar
                    self.active_rules[i] = None

        self.last_record = record
        return alarm
    def testUserGoogleSimpleAccountLinking(self):
        import jwt
        user = User.Create(email="*****@*****.**", g_id=USER_GOOGLE_ID)
        user.put()

        creation = int(tools.unixtime(ms=False))
        payload = {
            'iss': 'https://accounts.google.com',
            'aud': secrets.GOOGLE_CLIENT_ID,
            'sub': USER_GOOGLE_ID,
            'email': "*****@*****.**",
            'locale': "en_US",
            "iat": creation,
            "exp": creation + 60 * 60
        }
        params = {
            'grant_type':
            'urn:ietf:params:oauth:grant-type:jwt-bearer',
            'intent':
            'get',
            'assertion':
            jwt.encode(payload,
                       secrets.GOOGLE_CLIENT_SECRET,
                       algorithm='HS256')
        }
        response = self.post_json("/api/auth/google/token", params)
        token_type = response.get('token_type')
        self.assertEqual(token_type, 'bearer')
Exemple #20
0
 def finish(self, reportDone=True):
     """Called when the worker has finished, to allow for any final work to be done."""
     progress = None
     if reportDone:
         self.gcs_file.close()
         self.report.status = REPORT.DONE
         self.report.dt_generated = datetime.now()
         self.report.put()
         duration = self.report.get_duration()
         logging.debug(
             "GCSReportWorker finished. Counters: %s. Report ran for %d seconds."
             % (self.counters, duration))
         progress = {
             "status": REPORT.DONE,
             "resource": self.report.get_gcs_file(),
             "generated": tools.unixtime(dt=self.report.dt_generated),
             "report": self.report.json(),
             "duration": duration
         }
     else:
         logging.debug("Batch finished. Counters: %s" % (self.counters))
     p = {'val': self.counters['run'], "filename": self.report.title}
     if progress:
         p.update(progress)
     self.setProgress(p)
     gc.collect()  # Garbage collector
Exemple #21
0
 def last_deactivation_ts(self, rule_index):
     alarms = self.recent_alarms[rule_index]
     if alarms:
         last_alarm = alarms[0]
         if last_alarm:
             return tools.unixtime(last_alarm.dt_end)
     return None
 def testInSamePeriod(self):
     from constants import RULE
     volley = [
         # dt1, dt2, period_type, expect same (bool)
         (datetime(2016, 3, 31, 12, 15), datetime(2016, 3, 31, 12, 55), RULE.HOUR, True),
         (datetime(2016, 3, 31, 11, 58), datetime(2016, 3, 31, 12, 2), RULE.HOUR, False),
         (datetime(2016, 3, 31, 11, 58, 59), datetime(2016, 3, 31, 11, 58, 13), RULE.MINUTE, True),
         (datetime(2016, 3, 29), datetime(2016, 4, 1), RULE.WEEK, True),
         (datetime(2016, 3, 29), datetime(2016, 4, 4), RULE.WEEK, False),
         (datetime(2016, 1, 2), datetime(2016, 1, 28), RULE.MONTH, True),
         (datetime(2016, 1, 29), datetime(2015, 1, 4), RULE.MONTH, False)
     ]
     for v in volley:
         dt1, dt2, period_type, same = v
         ms1, ms2 = tools.unixtime(dt1), tools.unixtime(dt2)
         out = tools.in_same_period(ms1, ms2, period_type)
         self.assertEqual(out, same)
Exemple #23
0
 def __evalAggregateColumn(self, toks):
     column = toks[0]
     if not self.record_list:
         raise Exception("Can't evaluate aggregate column without record list")
     if column == 'ts':
         res = [tools.unixtime(r.dt_recorded) for r in self.record_list]
     else:
         res = [r.columnValue(column, 0) for r in self.record_list]
     return [res]
Exemple #24
0
    def post(self, d):
        user = None
        message = email = None
        auth = self.request.get('auth')
        pw = self.request.get('_pw')
        _login = self.request.get('_login')
        token = self.request.get('_token') # Google ID Token
        name = self.request.get('name')
        custom_attrs = self.request.get('custom_attrs')
        if custom_attrs:
            custom_attrs = custom_attrs.split(',')
        else:
            custom_attrs = None
        error_code = 0
        ok = False
        user = User.FuzzyGet(_login)
        if user:
            ok = False
            if (pw and user.validatePassword(pw)):
                ok = True
            elif token:
                ok = services.VerifyGoogleJWT(token, email=email)
                if ok:
                    user.session_id_token = str(token)
                    logging.debug("User token is now: %s" % user.session_id_token)
                else:
                    logging.debug("JWT invalid")
                    # Assume Google certs expired and retry
                    services.UpdateGoogleKeyCerts()
                    error_code = 2 # Bad token
            if ok:
                message = "Successful Login"
                self.session['user'] = user
                self.session['enterprise'] = user.enterprise
            else:
                user = None
                error_code = 1 # Unauthorized
                message = "Login / password mismatch"
        elif token:
            # No user, but this is an authenticated G+ login, so let's create the account
            ok = services.VerifyGoogleJWT(token, email=email)
            if ok:
                user = User.Create(email=email, name=name)
                if user:
                    user.session_id_token = str(token)
                    user.put()
        else:
            message = "User not found"
            error_code = 3

        data = {
            'ts': tools.unixtime(),
            'user': user.json(custom_attrs=custom_attrs) if user else None,
            'password': pw
        }
        self.json_out(data, message=message, error=error_code)
Exemple #25
0
    def writeData(self):
        total_i = self.counters['run']
        while True:
            self.query = self._get_query()
            if self.query:
                entities = self.query.fetch(limit=self.batch_size)
                self.cursor = self._get_cursor()
                if not entities:
                    logging.debug("No rows returned by query -- done")
                    return
                else:
                    logging.debug("Got %d rows" % len(entities))
                if entities and self.prefetch_props:
                    entities = tools.prefetch_reference_properties(
                        entities, *self.prefetch_props, missingRefNone=True)
                for entity in entities:
                    if entity:
                        ed = self.entityData(entity)
                    else:
                        continue
                    string = '?'
                    if self.report.ftype == REPORT.CSV:
                        csv.writer(self.gcs_file).writerow(
                            tools.normalize_list_to_ascii(ed))
                    elif self.report.ftype == REPORT.XLS:
                        self.gcs_file.write(json.dumps(ed) + "\n")
                        if total_i > REPORT.XLS_ROW_LIMIT:
                            self.setProgress({
                                'error':
                                "XLS row limit (%d) exceeded!" %
                                REPORT.XLS_ROW_LIMIT,
                                'status':
                                REPORT.ERROR
                            })
                            return
                    self.gcs_file.flush()

                    total_i += 1
                    self.counters['run'] += 1
                    if total_i % 100 == 0:
                        cancelled = self.updateProgressAndCheckIfCancelled()
                        if cancelled:
                            self.report.CleanDelete()
                            logging.debug(
                                "Worker cancelled by user, report deleted.")
                            return

                logging.debug("Batch of %d done" % len(entities))
                elapsed_ms = tools.unixtime() - self.worker_start
                elapsed = elapsed_ms / 1000
                if elapsed >= MAX_REQUEST_SECONDS or (
                        tools.on_dev_server()
                        and TEST_TOO_LONG_ON_EVERY_BATCH):
                    logging.debug("Elapsed %ss" % elapsed)
                    raise TooLongError()
Exemple #26
0
 def json(self):
     return {
         'id': self.key.id(),
         'ts': tools.unixtime(self.date),
         'host': self.host,
         'path': self.path,
         'method': self.method,
         'status': self.status,
         'request': self.request,
         'success': self.success,
         'message': self.message
     }
Exemple #27
0
 def testInSamePeriod(self):
     from constants import RULE
     volley = [
         # dt1, dt2, period_type, expect same (bool)
         (datetime(2016, 3, 31, 12, 15), datetime(2016, 3, 31, 12,
                                                  55), RULE.HOUR, True),
         (datetime(2016, 3, 31, 11, 58), datetime(2016, 3, 31, 12,
                                                  2), RULE.HOUR, False),
         (datetime(2016, 3, 31, 11, 58,
                   59), datetime(2016, 3, 31, 11, 58,
                                 13), RULE.MINUTE, True),
         (datetime(2016, 3, 29), datetime(2016, 4, 1), RULE.WEEK, True),
         (datetime(2016, 3, 29), datetime(2016, 4, 4), RULE.WEEK, False),
         (datetime(2016, 1, 2), datetime(2016, 1, 28), RULE.MONTH, True),
         (datetime(2016, 1, 29), datetime(2015, 1, 4), RULE.MONTH, False)
     ]
     for v in volley:
         dt1, dt2, period_type, same = v
         ms1, ms2 = tools.unixtime(dt1), tools.unixtime(dt2)
         out = tools.in_same_period(ms1, ms2, period_type)
         self.assertEqual(out, same)
Exemple #28
0
    def testAggregatedExpressionParsing(self):
        from models import Record
        record_list = []
        start_ms = tools.unixtime()
        ts_data = [
            long(start_ms + x) for x in range(0, 10 * 10 * 1000, 10 * 1000)
        ]  # 10 sec apart
        x_data = [4, 5, 6, 7, 5, 2, 1, 0, 1, 4]
        y_data = [0, 0, 1.0, 1.0, 1.0, 1, 0, 0, 0, 0]
        for i, ts, x, y in zip(range(10), ts_data, x_data, y_data):
            r = Record()
            r.setColumnValue("_ts", ts)
            r.setColumnValue("x", x)
            r.setColumnValue("y", y)
            record_list.append(r)
        now_ms = tools.unixtime()
        import numpy as np
        volley = [
            ["DOT({_ts},{y})", np.dot(ts_data, y_data)],
            ["MAX({y})", max(y_data)],
            ["MIN({y})", 0],
            ["AVE({x})", tools.average(x_data)],
            ["COUNT({y})", 10],
            ["DOT(DELTA({_ts}), {y}) / 1000", 40]  # 40 secs
        ]

        for v in volley:
            expr = v[0]
            target = v[1]
            tick = datetime.now()
            ep = ExpressionParser(expr, verbose=True, run_ms=now_ms)
            result = ep.run(record_list=record_list)
            tock = datetime.now()
            diff = tock - tick
            ms = diff.microseconds / 1000
            logmessage = "%s took %d ms" % (expr, ms)
            if ms > 100:
                logmessage += " <<<<<<<<<<<<<<<<<<<<<<<<<<< SLOW OP!"
            print logmessage
            self.assertEqual(result, target)
Exemple #29
0
    def finish(self, reportDone=True):
        """Called when the worker has finished, to allow for any final work to be done."""
        progress = None
        if reportDone:
            if self.report.ftype == REPORT.XLS:
                self.gcs_file.close()
                readable_gcs_file = gcs.open(self.gcs_file.name, 'r')
                data = readable_gcs_file.read().split("\n")
                readable_gcs_file.close()
                self.gcs_file = gcs.open(self.gcs_file.name, 'w')
                y = 0
                for r in data:
                    if not r:
                        continue
                    if y > REPORT.XLS_ROW_LIMIT:
                        logging.warning("Excel report exceeded row limit and was truncated")
                        break
                    y += 1
                    row = []
                    try:
                        row = json.loads(r)
                    except Exception, ex:
                        logging.error("Unable to json load row: %s (%s)" % (r, ex))
                    else:
                        for x, cell in enumerate(row):
                            if cell:
                                if x in self.report.date_columns:
                                    self.ws.write(y, x, cell, self.xls_styles['datetime'])
                                else:
                                    self.ws.write(y, x, cell)            
                        if self.make_sub_reports:
                            #TODO: Write section_work_sheet, survey to excel is not enabled for now though
                            pass
                self.wb.save(self.gcs_file)

            self.gcs_file.close()            
            if self.has_section_files():
                for section_gcs_file in self.section_gcs_files:
                    section_gcs_file.close()

            self.report.status = REPORT.DONE
            self.report.dt_generated = datetime.now()
            self.report.put()
            duration = self.report.getDuration()
            logging.debug("GCSReportWorker finished. Counters: %s. Report ran for %d seconds." % (self.counters, duration))
            progress = {
                "status": REPORT.DONE,
                "resource":self.report.getGCSFile(),
                "generated": tools.unixtime(dt=self.report.dt_generated),
                "report": self.report.json(),
                "duration": duration
            }
Exemple #30
0
    def finish(self, reportDone=True):
        """Called when the worker has finished, to allow for any final work to be done."""
        progress = None
        if reportDone:
            if self.report.ftype == REPORT.XLS:
                self.gcs_file.close()
                readable_gcs_file = gcs.open(self.gcs_file.name, 'r')
                data = readable_gcs_file.read().split("\n")
                readable_gcs_file.close()
                self.gcs_file = gcs.open(self.gcs_file.name, 'w')
                y = 0
                for r in data:
                    if not r:
                        continue
                    if y > REPORT.XLS_ROW_LIMIT:
                        logging.warning(
                            "Excel report exceeded row limit and was truncated"
                        )
                        break
                    y += 1
                    row = []
                    try:
                        row = json.loads(r)
                    except Exception, ex:
                        logging.error("Unable to json load row: %s (%s)" %
                                      (r, ex))
                    else:
                        for x, cell in enumerate(row):
                            if cell:
                                if x in self.report.date_columns:
                                    self.ws.write(y, x, cell,
                                                  self.xls_styles['datetime'])
                                else:
                                    self.ws.write(y, x, cell)
                self.wb.save(self.gcs_file)

            self.gcs_file.close()
            self.report.status = REPORT.DONE
            self.report.dt_generated = datetime.now()
            self.report.put()
            duration = self.report.getDuration()
            logging.debug(
                "GCSReportWorker finished. Counters: %s. Report ran for %d seconds."
                % (self.counters, duration))
            progress = {
                "status": REPORT.DONE,
                "resource": self.report.getGCSFile(),
                "generated": tools.unixtime(dt=self.report.dt_generated),
                "report": self.report.json(),
                "duration": duration
            }
Exemple #31
0
 def json(self):
     data = {
         'id': self.key.id(),
         'level':self.level,
         'level_name':self.print_level(),
         'name': self.name,
         'email':self.email,
         'phone': self.phone,
         'location_text': self.location_text,
         'ts_created': tools.unixtime(self.dt_created),
         'services_enabled': self.services_enabled,
         'service_settings': tools.getJson(self.service_settings)
     }
     credentials = tools.getJson(self.credentials)
     if credentials:
         data['scopes'] = credentials.get('scopes')
     return data
Exemple #32
0
    def writeData(self):
        total_i = self.counters['run']
        while True:
            self.query = self._get_query()
            if self.query:
                entities = self.query.fetch(limit=self.batch_size)
                self.cursor = self._get_cursor()                
                if not entities:
                    logging.debug("No rows returned by query -- done")
                    return
                if entities and self.prefetch_props:
                    entities = tools.prefetch_reference_properties(entities, *self.prefetch_props, missingRefNone=True)
                for entity in entities:
                    if entity:
                        ed = self.entityData(entity)
                    else:
                        continue
                    string = '?'
                    if self.report.ftype == REPORT.CSV:
                        csv.writer(self.gcs_file).writerow(tools.normalize_list_to_ascii(ed))
                        if sections_data and self.has_section_files():
                            for section_gcs_file, sd in zip(self.section_gcs_files, sections_data):
                                for sd_rows in zip(*sd):
                                    csv.writer(section_gcs_file).writerow(tools.normalize_list_to_ascii(sd_rows))
                    elif self.report.ftype == REPORT.XLS:          
                        self.gcs_file.write(json.dumps(ed)+"\n")
                        if total_i > REPORT.XLS_ROW_LIMIT:
                            self.setProgress({'error': "XLS row limit (%d) exceeded!" % REPORT.XLS_ROW_LIMIT, 'status': REPORT.ERROR})
                            return
                    self.gcs_file.flush()

                    total_i += 1
                    self.counters['run'] += 1
                    if total_i % 100 == 0:
                        cancelled = self.updateProgressAndCheckIfCancelled()
                        if cancelled:
                            self.report.CleanDelete()
                            logging.debug("Worker cancelled by user, report deleted.")
                            return

                logging.debug("Batch of %d done" % len(entities))
                elapsed = tools.unixtime(local=False) - self.worker_start
                if elapsed >= MAX_REQUEST_SECONDS or (tools.on_dev_server() and TEST_TOO_LONG_ON_EVERY_BATCH):
                    logging.debug("Elapsed %ss" % elapsed)
                    raise TooLongError()
Exemple #33
0
    def __init__(self,
                 rkey,
                 start_att="__key__",
                 start_att_desc=False,
                 title="Report"):
        self.report = rkey.get()
        if not self.report:
            logging.error("Error retrieving report [ %s ] from db" % rkey)
            return
        self.start_att = start_att
        self.start_att_desc = start_att_desc
        self.FILTERS = []
        self.report.status = REPORT.GENERATING
        self.specs = self.report.get_specs()
        self.start_ts = self.specs.get('start', 0)
        self.end_ts = self.specs.get('end', 0)
        self.report.generate_title(title,
                                   ts_start=self.start_ts,
                                   ts_end=self.end_ts)
        self.report.put()
        self.add_date_filters(start=self.start_ts, end=self.end_ts)
        self.user = self.report.key.parent().get()
        self.ancestor = self.user
        self.counters = {'run': 0, 'skipped': 0}
        self.worker_start = tools.unixtime()
        self.cursor = None
        self.worker_cancelled = False
        self.prefetch_props = []
        self.date_columns = []
        self.headers = []
        self.projection = None
        self.cursor = None
        self.query = None
        self.batch_size = 1000
        self.report_prog_mckey = MC_EXPORT_STATUS % self.report.key
        self.setProgress({'val': 0, "status": REPORT.GENERATING})
        self.gcs_file = gcs.open(self.get_gcs_filename(), 'w')

        # From: https://code.google.com/p/googleappengine/issues/detail?id=8809
        logservice.AUTOFLUSH_ENABLED = True
        logservice.AUTOFLUSH_EVERY_BYTES = None
        logservice.AUTOFLUSH_EVERY_SECONDS = 1
        logservice.AUTOFLUSH_EVERY_BYTES = 1024
        logservice.AUTOFLUSH_EVERY_LINES = 1
Exemple #34
0
    def run(self, start_cursor=None):
        self.worker_start = tools.unixtime()
        self.cursor = start_cursor

        if not start_cursor:
            self.writeHeaders()

        try:
            # This is heavy
            self.writeData()
        except TooLongError:
            logging.debug("TooLongError: Going to the next batch")
            if self.report:
                self.finish(reportDone=False)
                tools.safe_add_task(self.run, start_cursor=self._get_cursor(), _queue="report-queue")
        except Exception, e:  # including DeadlineExceededError
            traceback.print_exc()
            logging.error("Error: %s" % e)
            self.setProgress({'error': "Error occurred: %s" % e, 'status': REPORT.ERROR})
            return
Exemple #35
0
 def pocket_sync(self, d):
     '''
     Sync from pocket since last sync
     '''
     from services import pocket
     TS_KEY = 'pocket_last_timestamp'  # Seconds
     access_token = self.user.get_integration_prop('pocket_access_token')
     init_sync_since = tools.unixtime(datetime.now() - timedelta(days=7), ms=False)
     last_timestamp = self.user.get_integration_prop(TS_KEY, init_sync_since)
     readables = []
     if access_token:
         self.success, readables, latest_timestamp = pocket.sync(self.user, access_token, last_timestamp)
         self.user.set_integration_prop(TS_KEY, latest_timestamp)
         self.user.put()
         self.update_session_user(self.user)
     else:
         self.message = "Please link your Pocket account from the integrations page"
     self.set_response({
         'readables': [r.json() for r in readables]
     })
Exemple #36
0
    def __init__(self, rkey, start_att="__key__", start_att_desc=False, title="Report"):
        self.report = rkey.get()
        if not self.report:
            logging.error("Error retrieving report [ %s ] from db" % rkey)
            return
        self.start_att = start_att
        self.start_att_desc = start_att_desc
        self.FILTERS = []
        self.report.status = REPORT.GENERATING
        self.specs = self.report.get_specs()
        self.start_ts = self.specs.get('start', 0)
        self.end_ts = self.specs.get('end', 0)
        self.report.generate_title(title, ts_start=self.start_ts, ts_end=self.end_ts)
        self.report.put()
        self.add_date_filters(start=self.start_ts, end=self.end_ts)
        self.user = self.report.key.parent().get()
        self.ancestor = self.user
        self.counters = {
            'run': 0,
            'skipped': 0
        }
        self.worker_start = tools.unixtime()
        self.cursor = None
        self.worker_cancelled = False
        self.prefetch_props = []
        self.date_columns = []
        self.headers = []
        self.projection = None
        self.cursor = None
        self.query = None
        self.batch_size = 1000
        self.report_prog_mckey = MC_EXPORT_STATUS % self.report.key
        self.setProgress({'val': 0, "status": REPORT.GENERATING})
        self.gcs_file = gcs.open(self.get_gcs_filename(), 'w')

        # From: https://code.google.com/p/googleappengine/issues/detail?id=8809
        logservice.AUTOFLUSH_ENABLED = True
        logservice.AUTOFLUSH_EVERY_BYTES = None
        logservice.AUTOFLUSH_EVERY_SECONDS = 1
        logservice.AUTOFLUSH_EVERY_BYTES = 1024
        logservice.AUTOFLUSH_EVERY_LINES = 1
 def __createNewRecords(self, data, first_dt=None, interval_secs=3, sensor=None):
     if not sensor:
         sensor = self.vehicle_1
     now = first_dt if first_dt else datetime.now()
     records = []
     N = len(data.values()[0])
     for i in range(N):
         _r = {}
         for column, vals in data.items():
             _r[column] = vals[i]
         if 'ts' in data:
             # If ts passed in record, overrides
             now = util.ts_to_dt(data['ts'])
         else:
             now += timedelta(seconds=interval_secs)
         r = Record.Create(tools.unixtime(now), sensor, _r, allow_future=True)
         records.append(r)
     db.put(records)
     sensor.dt_updated = datetime.now()
     sensor.put()
     logging.debug("Created %d records" % len(records))
Exemple #38
0
 def __init__(self, rkey, start_att="__key__", start_att_direction="", make_sub_reports=False):
     self.report = Report.get(rkey)
     
     if not self.report:
         logging.error("Error retrieving report [ %s ] from db" % rkey)
         return                
     self.report.status = REPORT.GENERATING
     self.report.put()
     
     self.counters = {
         'run': 0,
         'skipped': 0
     }
     self.worker_start = tools.unixtime()
     self.cursor = None
     self.start_att = start_att
     self.start_att_direction = start_att_direction
     self.worker_cancelled = False
     self.prefetch_props = []
     self.date_columns = []
     self.headers = []
     self.date_att = None
     self.projection = None
     self.query = None
     self.batch_size = 300
     self.make_sub_reports = make_sub_reports
     self.report_prog_mckey = MC_EXPORT_STATUS % self.report.key()
     self.setProgress({'val':0, "status":REPORT.GENERATING})
     self.gcs_file = gcs.open(self.getGCSFilename(), 'w')
     self.section_gcs_files = []
     self.setup()
         
     # From: https://code.google.com/p/googleappengine/issues/detail?id=8809
     logservice.AUTOFLUSH_ENABLED = True
     logservice.AUTOFLUSH_EVERY_BYTES = None
     logservice.AUTOFLUSH_EVERY_SECONDS = 1
     logservice.AUTOFLUSH_EVERY_BYTES = 1024
     logservice.AUTOFLUSH_EVERY_LINES = 1
    def testUserGoogleSimpleAccountLinking(self):
        import jwt
        user = User.Create(email="*****@*****.**", g_id=USER_GOOGLE_ID)
        user.put()

        creation = int(tools.unixtime(ms=False))
        payload = {
            'iss': 'https://accounts.google.com',
            'aud': secrets.GOOGLE_CLIENT_ID,
            'sub': USER_GOOGLE_ID,
            'email': "*****@*****.**",
            'locale': "en_US",
            "iat": creation,
            "exp": creation + 60*60
        }
        params = {
            'grant_type': 'urn:ietf:params:oauth:grant-type:jwt-bearer',
            'intent': 'get',
            'assertion': jwt.encode(payload, secrets.GOOGLE_CLIENT_SECRET, algorithm='HS256')
        }
        response = self.post_json("/api/auth/google/token", params)
        token_type = response.get('token_type')
        self.assertEqual(token_type, 'bearer')
Exemple #40
0
def sync(user, access_token):
    '''
    Return JSON array {title, author, isbn, image}

    Sample dict from pocket:

    {u'resolved_url': u'https://arxiv.org/abs/1701.06538', u'given_title': u'', u'is_article': u'1', u'sort_id': 16, u'word_count': u'221', u'status': u'0', u'has_image': u'0', u'given_url': u'https://arxiv.org/abs/1701.06538', u'favorite': u'0', u'has_video': u'0', u'time_added': u'1485774143', u'time_updated': u'1485774143', u'time_read': u'0', u'excerpt': u'Authors: Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, Jeff Dean  Abstract: The capacity of a neural network to absorb information is limited by its number of parameters.', u'resolved_title': u'Title: Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer', u'authors': {u'32207876': {u'url': u'', u'author_id': u'32207876', u'item_id': u'1576987151', u'name': u'cscs.CLcs.NEstatstat.ML'}}, u'resolved_id': u'1576987151', u'item_id': u'1576987151', u'time_favorited': u'0', u'is_index': u'0'}
    {u'resolved_url': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-china/', u'given_title': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-c', u'is_article': u'1', u'sort_id': 99, u'word_count': u'800', u'status': u'1', u'has_image': u'0', u'given_url': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-china/?partner=rss&emc=rss&smid=tw-nytimes', u'favorite': u'0', u'has_video': u'0', u'time_added': u'1349951324', u'time_updated': u'1482284773', u'time_read': u'1482284772', u'excerpt': u'Your clothes, your child\u2019s toys, even the device you use to read these words may have been made in China. They are among the $100 billion of goods that the United States imports from China each year \u2014 an exchange that has become an important issue in the 2012 presidential campaign.', u'resolved_title': u'Looking Into the Eyes of &#8216;Made in China&#8217;', u'authors': {u'3024958': {u'url': u'', u'author_id': u'3024958', u'item_id': u'233921121', u'name': u'KERRI MACDONALD'}}, u'resolved_id': u'233843309', u'item_id': u'233921121', u'time_favorited': u'0', u'is_index': u'0'}
    '''
    dt = datetime.now() - timedelta(days=7)
    init_sync_since = tools.unixtime(dt, ms=False)
    TS_KEY = 'pocket_last_timestamp'  # Seconds
    since_timestamp = user.get_integration_prop(TS_KEY, init_sync_since)
    data = urllib.urlencode({
        'access_token': access_token,
        'consumer_key': POCKET_CONSUMER_KEY,
        'detailType': 'complete',
        'since': since_timestamp,
        'state': 'all'
    })
    success = False
    logging.debug("Syncing pocket for %s since %s" % (user, dt))
    res = urlfetch.fetch(
        url=GET_ENDPOINT,
        payload=data,
        method=urlfetch.POST,
        deadline=60,
        validate_certificate=True)
    logging.debug(res.status_code)
    latest_timestamp = 0
    readables = []
    if res.status_code == 200:
        data = json.loads(res.content)
        articles = data.get('list', {})
        latest_timestamp = data.get('since', 0) #?
        save = []
        USE_RESOLVED_TITLE = True
        if articles:
            for id, article in articles.items():
                source = 'pocket'
                if USE_RESOLVED_TITLE:
                    title = article.get('resolved_title')
                else:
                    title = article.get('given_title')
                url = article.get('given_url')
                status = article.get('status')
                authors = article.get('authors')
                excerpt = article.get('excerpt')
                images = article.get('images')
                time_added = int(article.get('time_added', 0)) * 1000
                time_read = int(article.get('time_read', 0)) * 1000
                dt_added = tools.dt_from_ts(time_added)
                dt_read = tools.dt_from_ts(time_read) if time_read else None
                tags = article.get('tags', {}).keys()
                word_count = int(article.get('word_count', 0))
                favorite = int(article.get('favorite', 0)) == 1
                image_url = None
                author = None
                if images:
                    first_image = images.get('1')
                    if first_image:
                        image_url = first_image.get('src')
                if authors:
                    author_keys = authors.keys()
                    if author_keys:
                        author = authors.get(author_keys[0], {}).get('name')
                archived = int(status) == 1
                read = archived and (not tags or 'unread' not in tags)
                r = Readable.CreateOrUpdate(user, source_id=id, title=title, url=url,
                                            image_url=image_url, author=author,
                                            excerpt=excerpt, favorite=favorite,
                                            dt_added=dt_added, word_count=word_count,
                                            dt_read=dt_read,
                                            tags=tags, source=source, read=read)
                if r:
                    r.Update(read=archived, favorite=favorite, dt_read=dt_read)
                    save.append(r)
                    readables.append(r)
        ndb.put_multi(save)  # Save all
        Readable.put_sd_batch(save)
        user.set_integration_prop(TS_KEY, latest_timestamp)
        success = True
    else:
        logging.debug(res.headers)
    return (success, readables, latest_timestamp)
Exemple #41
0
def sync(user, access_token):
    '''
    Return JSON array {title, author, isbn, image}

    Sample dict from pocket:

    {u'resolved_url': u'https://arxiv.org/abs/1701.06538', u'given_title': u'', u'is_article': u'1', u'sort_id': 16, u'word_count': u'221', u'status': u'0', u'has_image': u'0', u'given_url': u'https://arxiv.org/abs/1701.06538', u'favorite': u'0', u'has_video': u'0', u'time_added': u'1485774143', u'time_updated': u'1485774143', u'time_read': u'0', u'excerpt': u'Authors: Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, Jeff Dean  Abstract: The capacity of a neural network to absorb information is limited by its number of parameters.', u'resolved_title': u'Title: Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer', u'authors': {u'32207876': {u'url': u'', u'author_id': u'32207876', u'item_id': u'1576987151', u'name': u'cscs.CLcs.NEstatstat.ML'}}, u'resolved_id': u'1576987151', u'item_id': u'1576987151', u'time_favorited': u'0', u'is_index': u'0'}
    {u'resolved_url': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-china/', u'given_title': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-c', u'is_article': u'1', u'sort_id': 99, u'word_count': u'800', u'status': u'1', u'has_image': u'0', u'given_url': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-china/?partner=rss&emc=rss&smid=tw-nytimes', u'favorite': u'0', u'has_video': u'0', u'time_added': u'1349951324', u'time_updated': u'1482284773', u'time_read': u'1482284772', u'excerpt': u'Your clothes, your child\u2019s toys, even the device you use to read these words may have been made in China. They are among the $100 billion of goods that the United States imports from China each year \u2014 an exchange that has become an important issue in the 2012 presidential campaign.', u'resolved_title': u'Looking Into the Eyes of &#8216;Made in China&#8217;', u'authors': {u'3024958': {u'url': u'', u'author_id': u'3024958', u'item_id': u'233921121', u'name': u'KERRI MACDONALD'}}, u'resolved_id': u'233843309', u'item_id': u'233921121', u'time_favorited': u'0', u'is_index': u'0'}
    '''
    dt = datetime.now() - timedelta(days=7)
    init_sync_since = tools.unixtime(dt, ms=False)
    TS_KEY = 'pocket_last_timestamp'  # Seconds
    since_timestamp = user.get_integration_prop(TS_KEY, init_sync_since)
    data = urllib.urlencode({
        'access_token': access_token,
        'consumer_key': POCKET_CONSUMER_KEY,
        'detailType': 'complete',
        'since': since_timestamp,
        'state': 'all'
    })
    success = False
    logging.debug("Syncing pocket for %s since %s" % (user, dt))
    res = urlfetch.fetch(url=GET_ENDPOINT,
                         payload=data,
                         method=urlfetch.POST,
                         deadline=60,
                         validate_certificate=True)
    logging.debug(res.status_code)
    latest_timestamp = 0
    readables = []
    if res.status_code == 200:
        data = json.loads(res.content)
        articles = data.get('list', {})
        latest_timestamp = data.get('since', 0)  #?
        save = []
        USE_RESOLVED_TITLE = True
        if articles:
            for id, article in articles.items():
                source = 'pocket'
                if USE_RESOLVED_TITLE:
                    title = article.get('resolved_title')
                else:
                    title = article.get('given_title')
                url = article.get('given_url')
                status = article.get('status')
                authors = article.get('authors')
                excerpt = article.get('excerpt')
                images = article.get('images')
                time_added = int(article.get('time_added', 0)) * 1000
                time_read = int(article.get('time_read', 0)) * 1000
                dt_added = tools.dt_from_ts(time_added)
                dt_read = tools.dt_from_ts(time_read) if time_read else None
                tags = article.get('tags', {}).keys()
                word_count = int(article.get('word_count', 0))
                favorite = int(article.get('favorite', 0)) == 1
                image_url = None
                author = None
                if images:
                    first_image = images.get('1')
                    if first_image:
                        image_url = first_image.get('src')
                if authors:
                    author_keys = authors.keys()
                    if author_keys:
                        author = authors.get(author_keys[0], {}).get('name')
                archived = int(status) == 1
                read = archived and (not tags or 'unread' not in tags)
                r = Readable.CreateOrUpdate(user,
                                            source_id=id,
                                            title=title,
                                            url=url,
                                            image_url=image_url,
                                            author=author,
                                            excerpt=excerpt,
                                            favorite=favorite,
                                            dt_added=dt_added,
                                            word_count=word_count,
                                            dt_read=dt_read,
                                            tags=tags,
                                            source=source,
                                            read=read)
                if r:
                    r.Update(read=archived, favorite=favorite, dt_read=dt_read)
                    save.append(r)
                    readables.append(r)
        ndb.put_multi(save)  # Save all
        Readable.put_sd_batch(save)
        user.set_integration_prop(TS_KEY, latest_timestamp)
        success = True
    else:
        logging.debug(res.headers)
    return (success, readables, latest_timestamp)
Exemple #42
0
 def __evalFunction(self, toks):
     val = toks[0]
     fnName = val[0].upper()
     args = val[1:]
     args = [arg for arg in args if arg is not None]  # Filter nones
     if not args:
         return 0
     if fnName == 'SUM':
         args = self.__getArglist(args)
         if args:
             return [sum(args)]
         return [0]
     elif fnName == 'AVE':
         from tools import average
         args = self.__getArglist(args)
         if args:
             return [average(args)]
         return [0]
     elif fnName == 'MAX':
         args = self.__getArglist(args)
         if args:
             res = max(args)
             return [res]
         return [0]
     elif fnName == "MIN":
         args = self.__getArglist(args)
         if args:
             return [min(args)]
         return [0]
     elif fnName == "COUNT":
         args = self.__getArglist(args)
         return [len(args)]
     elif fnName == "ALARMS":
         from models import Alarm
         # Usage: ALARMS([rule_id])
         # Returns list of alarms in processed batch, optionally filtered by rule_id
         alarm_list = list(self.alarm_list)
         if args and type(args[0]) in [int, long, float]:
             rule_id = int(args[0])
             if rule_id:
                 alarm_list = [
                     al for al in alarm_list if tools.getKey(
                         Alarm, 'rule', al, asID=True) == rule_id
                 ]
         return [alarm_list]
     elif fnName == "DISTANCE":
         dist = 0
         last_gp = None
         args = self.__getArglist(args)
         for gp in args:
             gp = tools.safe_geopoint(gp)
             if last_gp and gp:
                 dist += tools.calcLocDistance(last_gp, gp)
             if gp:
                 last_gp = gp
         return [dist]  # m
     elif fnName == "SQRT":
         arg = args[0]
         return [math.sqrt(arg)]
     elif fnName == "SINCE":
         # Returns ms since event (argument), or 0 if none found
         event = args[0]
         since = 0
         now = self.run_ms
         try:
             if event:
                 if type(event) in [long, float]:
                     # Treat as ms timestamp
                     since = now - event
                 elif isinstance(event, basestring):
                     pass
                 elif event.kind() == 'Alarm':
                     since = now - tools.unixtime(event.dt_start)
                 elif event.kind() == 'Record':
                     since = now - tools.unixtime(event.dt_recorded)
         except Exception, e:
             logging.warning("Error in SINCE() - %s" % e)
         return [since]
Exemple #43
0
    def testSimpleExpressionParsing(self):
        from models import Record
        r = Record()
        x = 5
        y = -2
        z = 3.5
        r.setColumnValue("x", x)
        r.setColumnValue("y", y)
        r.setColumnValue("z", z)
        now_ms = tools.unixtime()
        volley = [
            ["1 + 1", (1 + 1)],
            ["1 + 1 + 5", (1 + 1 + 5)],
            ["2 * 8 + 3", (2 * 8) + 3],
            ["4 + 5 * 2", 4 + (5 * 2)],
            ["40000 / 1000", 40],
            ["2^3", (pow(2, 3))],
            ["(8/2)*3 + 9", ((8 / 2) * 3 + 9)],
            ["[x]^2", (pow(x, 2))],
            ["'a' * 3", 0],  # Non-numeric, treat as 0
            ["3.0 * 3", 9],
            ["SQRT([x]^2 + [y]^2)",
             math.sqrt(pow(x, 2) + pow(y, 2))],
            ["5 > 2", True],
            ["5 > 6", False],
            ["(3*5) < 20", True],
            ["[x] > 100", False],
            ["(3*5) < 20 AND [x] > 100", False],
            ["(3*5) < 20 AND [x] > 0 AND [x] > 1", True],
            ["1==1 OR 1==3 AND 2==0", True],
            ["(1==1 OR 1==3) AND 2==2", True],
            ["(1==2 AND 1==3) OR 2==2", True],
            ["(1==1 OR 1==1) AND 1==0", False],
            ["1==1 OR 1==1 AND 1==0", True],  # And first
            ["1==1 OR (1==1 AND 1==0)", True],
            ["1 == 2 OR [x] > 100 OR [x] > 1", True],
            ["\"ONE\" == \"ONE\"", True],
            ["\"ONE / (1)\" == \"ONE / (1)\"", True],
            ["1==2 OR 1==1 OR 1==4 OR 1==5", True],
            ["SINCE(1467011405000)", now_ms - 1467011405000],
            ["SQRT([x]^2 + [y]^2)", (math.sqrt(pow(x, 2) + pow(y, 2)))],
            [
                "SQRT([x]^2 + [y]^2 + 8^2)",
                (math.sqrt(pow(x, 2) + pow(y, 2) + pow(8, 2)))
            ],
            [
                "SQRT([x]^2 + [y]^2 + [z]^2)",
                (math.sqrt(pow(x, 2) + pow(y, 2) + pow(z, 2)))
            ]
        ]

        for v in volley:
            expr = v[0]
            target = v[1]
            tick = datetime.now()
            ep = ExpressionParser(expr, verbose=True, run_ms=now_ms)
            result = ep.run(r)
            tock = datetime.now()
            diff = tock - tick
            ms = diff.microseconds / 1000
            logmessage = "%s took %d ms" % (expr, ms)
            if ms > 100:
                logmessage += " <<<<<<<<<<<<<<<<<<<<<<<<<<< SLOW OP!"
            print logmessage
            self.assertEqual(result, target)