Beispiel #1
0
    def send_to_api(self, path, post, access_token):
        ctx = ndb.get_context()
        try:
            resp = yield ctx.urlfetch('https://alpha-api.app.net/stream/0/%s' % path, payload=json.dumps(post), deadline=30,
                                      method='POST', headers={
                                          'Authorization': 'Bearer %s' % access_token,
                                          'Content-Type': 'application/json',
                                      })
        except:
            logger.exception('Failed to post path: %s data: %s' % (path, post))
            raise deferred.SingularTaskFailure()

        parsed_resp = json.loads(resp.content)
        if resp.status_code == 401:
            logger.info('unauthorized')
            yield self.handle_unauthorized(parsed_resp, post)
            raise deferred.PermanentTaskFailure()
        elif resp.status_code == 200:
            self.handle_success(parsed_resp, post)
        elif resp.status_code == 400:
            yield self.handle_bad_response(parsed_resp, post)
            raise deferred.PermanentTaskFailure()
        elif resp.status_code == 403:
            yield self.handle_forbidden(parsed_resp, post)
            raise deferred.PermanentTaskFailure()
        else:
            logger.warn("Couldn't post entry key=%s. Error: %s Post:%s", self.entry_key, parsed_resp, post)
            raise deferred.SingularTaskFailure()
Beispiel #2
0
def deferred_ratings():
    """ This is the deferred ratings table calculation process """
    # Disable the in-context cache to save memory
    # (it doesn't give any speed advantage for this processing)
    Context.disable_cache()
    t0 = time.time()

    try:

        _create_ratings()

    except DeadlineExceededError as ex:
        # Hit deadline: save the stuff we already have and
        # defer a new task to continue where we left off
        logging.error(u"Deadline exceeded in ratings, failing permamently")
        # Normal return prevents this task from being run again
        raise deferred.PermanentTaskFailure()

    except Exception as ex:
        logging.error(
            u"Exception in ratings, failing permanently: {0}".format(ex))
        # Avoid having the task retried
        raise deferred.PermanentTaskFailure()

    t1 = time.time()

    logging.info(u"Ratings calculation finished in {0:.2f} seconds".format(t1 -
                                                                           t0))
    StatsModel.log_cache_stats()
    StatsModel.clear_cache(
    )  # Do not maintain the cache in memory between runs
Beispiel #3
0
def _put_annotations_batch(annotation_data):
    try:
        annotation_objects = []
        qry = BookRecord.query(
            BookRecord.item_id_array.IN(annotation_data.keys()))
        keys = qry.fetch(MAX_OBJECTS_PER_BATCH, keys_only=True)
        for key in keys:
            first_item_id = key.string_id().split('|')[0]

            if first_item_id not in annotation_data:
                continue

            short_text = annotation_data[first_item_id][0]
            long_text = annotation_data[first_item_id][1]

            anno_key = ndb.Key(BookAnnotation, key.string_id())
            annotation = BookAnnotation(key=anno_key,
                                        short=short_text,
                                        long=long_text)
            annotation_objects.append(annotation)
        ndb.put_multi(annotation_objects)
        logging.info("{} annotations put into datastore".format(
            len(annotation_objects)))
    except Exception as e:
        logging.error(e)
        raise deferred.PermanentTaskFailure()
    def main(self, sequence_num):
        """Main method of the deferred task."""

        with Namespace(self._namespace):
            logging.info('Job started: %s w/ sequence number %d',
                         self._job_name, sequence_num)

            time_started = time.time()
            try:
                db.run_in_transaction(DurableJobEntity._start_job,
                                      self._job_name, sequence_num)
                result = self.run()
                db.run_in_transaction(DurableJobEntity._complete_job,
                                      self._job_name, sequence_num,
                                      transforms.dumps(result),
                                      long(time.time() - time_started))
                logging.info('Job completed: %s', self._job_name)
            except (Exception, runtime.DeadlineExceededError) as e:
                logging.error(traceback.format_exc())
                logging.error('Job failed: %s\n%s', self._job_name, e)
                db.run_in_transaction(DurableJobEntity._fail_job,
                                      self._job_name, sequence_num,
                                      traceback.format_exc(),
                                      long(time.time() - time_started))
                raise deferred.PermanentTaskFailure(e)
Beispiel #5
0
    def main(self):
        """Main method of the deferred task."""
        logging.info('Job started: %s', self._job_name)

        time_started = time.time()
        old_namespace = namespace_manager.get_namespace()
        try:
            namespace_manager.set_namespace(self._namespace)
            try:
                db.run_in_transaction(DurableJobEntity._start_job,
                                      self._job_name)
                result = self.run()
                db.run_in_transaction(DurableJobEntity._complete_job,
                                      self._job_name, transforms.dumps(result),
                                      long(time.time() - time_started))
                logging.info('Job completed: %s', self._job_name)
            except (Exception, runtime.DeadlineExceededError) as e:
                logging.error(traceback.format_exc())
                logging.error('Job failed: %s\n%s', self._job_name, e)
                db.run_in_transaction(DurableJobEntity._fail_job,
                                      self._job_name, traceback.format_exc(),
                                      long(time.time() - time_started))
                raise deferred.PermanentTaskFailure(e)
        finally:
            namespace_manager.set_namespace(old_namespace)
Beispiel #6
0
def deferred_update():
    """ Update all users in the datastore with lowercase nick and full name """
    logging.info("Deferred user update starting")
    CHUNK_SIZE = 200
    count = 0
    offset = 0
    Context.disable_cache()
    try:
        q = UserModel.query()
        while True:
            ulist = []
            chunk = 0
            for um in q.fetch(CHUNK_SIZE, offset = offset):
                chunk += 1
                if um.nick_lc is None:
                    try:
                        um.nick_lc = um.nickname.lower()
                        um.name_lc = um.prefs.get("full_name", "").lower() if um.prefs else ""
                        ulist.append(um)
                    except Exception as e:
                        logging.info("Exception in deferred_update() when setting nick_lc: {0}".format(e))
            if ulist:
                try:
                    ndb.put_multi(ulist)
                    count += len(ulist)
                except Exception as e:
                    logging.info("Exception in deferred_update() when updating ndb: {0}".format(e))
            if chunk < CHUNK_SIZE:
                break
            offset += CHUNK_SIZE
    except Exception as e:
        logging.info("Exception in deferred_update(): {0}, already updated {1} records".format(e, count))
        # Do not retry the task
        raise deferred.PermanentTaskFailure()
    logging.info("Completed updating {0} user records".format(count))
Beispiel #7
0
def ExtractDetailsCrawlerIssueTracker(project_name, bug_id):
    """Extract useful information for a given bug."""
    logging.debug('Scraping details for bug %s in project %s.', bug_id,
                  project_name)
    phclient = gdata.projecthosting.client.ProjectHostingClient()
    try:
        query = gdata.projecthosting.client.Query(issue_id=bug_id)
        feed = phclient.get_issues(project_name, query=query)
    except gdata.client.RequestError, e:
        if ('HTTP_X_APPENGINE_TASKRETRYCOUNT' in environ and int(
                environ['HTTP_X_APPENGINE_TASKRETRYCOUNT']) < _MAX_RETRIES):
            if e.status == 403:  # Skip 403 (Unautorized)errors.
                logging.info(
                    'Unautorized to access this issue, skipping: %s, %s',
                    bug_id, project_name)

                # Nuke cache data for private bugs.
                url_bug_map.DeleteBugAndMappings(
                    bug_id, project_name, bugs_util.Provider.ISSUETRACKER)
                return
            else:
                raise BugCrawlerError(
                    'Error while trying to get details for %s. Error %s' %
                    (str(bug_id), str(e)))
        else:
            raise deferred.PermanentTaskFailure(
                'Error hit too many times, aborting '
                'extracting details for bug %s on project %s. Error: %s' %
                (str(bug_id), str(project_name), str(e)))
Beispiel #8
0
    def main(self, sequence_num):
        """Main method of the deferred task."""

        with Namespace(self._namespace):
            logging.info('Job started: %s w/ sequence number %d',
                         self._job_name, sequence_num)

            time_started = time.time()
            try:
                # Check we haven't been canceled before we start.
                if self._already_finished(sequence_num):
                    logging.info(
                        'Job %s sequence %d already canceled or subsequent '
                        'run completed; not running this version.',
                        self._job_name, sequence_num)
                    return
                db.run_in_transaction(DurableJobEntity._start_job,
                                      self._job_name, sequence_num)
                result = self.run()
                db.run_in_transaction(DurableJobEntity._complete_job,
                                      self._job_name, sequence_num,
                                      transforms.dumps(result),
                                      long(time.time() - time_started))
                logging.info('Job completed: %s', self._job_name)
            except (Exception, runtime.DeadlineExceededError) as e:
                logging.error(traceback.format_exc())
                logging.error('Job failed: %s\n%s', self._job_name, e)
                db.run_in_transaction(DurableJobEntity._fail_job,
                                      self._job_name, sequence_num,
                                      traceback.format_exc(),
                                      long(time.time() - time_started))
                raise deferred.PermanentTaskFailure(e)
Beispiel #9
0
def deferred_check_user(user_id):
    user = ModelProxy.User.get_by_id(user_id)
    if not user:
        raise deferred.PermanentTaskFailure("User id {} is invalid.".format(user_id))

    unique_email_address(user)
    unique_final_submission(user)
    unique_group(user)
Beispiel #10
0
def insert_gtest_results(build_step_key):
    """Inserts GTest results into the datastore, replacing any existing ones.
  Also records used parser version."""
    step = BuildStep.get(build_step_key)

    log_contents = ''
    if step.log_gs:
        with cloudstorage.open(step.log_gs) as gs_file:
            log_contents = html2text(gs_file.read().decode('utf-8', 'replace'))
    else:
        try:
            blob_reader = blobstore.BlobReader(step.log_stdio)
            log_contents = html2text(blob_reader.read().decode(
                'utf-8', 'replace'))
        except (ValueError, blobstore.BlobNotFoundError) as e:
            raise deferred.PermanentTaskFailure(e)
    gtest_results = gtest_parser.parse(log_contents)

    to_put = []
    for fullname, result in gtest_results.iteritems():
        # Only store failure results.
        if result['is_successful']:
            continue

        if isinstance(result['log'], unicode):
            log = db.Text(result['log'])
        else:
            log = db.Text(result['log'], encoding='utf-8')
        result_entity = GTestResult(
            parent=db.Key.from_path('GTestResult', str(step.key())),
            build_step=step,
            time_finished=step.time_finished,
            gtest_parser_version=gtest_parser.VERSION,
            is_crash_or_hang=result['is_crash_or_hang'],
            fullname=fullname,
            run_time_ms=result['run_time_ms'],
            log=log)
        to_put.append(result_entity)
    for chunk in chunks(to_put, BATCH_SIZE):
        db.put(chunk)

    def tx_parser_version():
        step = BuildStep.get(build_step_key)
        orig_parser_version = step.gtest_parser_version
        if step.gtest_parser_version < gtest_parser.VERSION:
            step.gtest_parser_version = gtest_parser.VERSION
            step.put()
        return (orig_parser_version, step.gtest_parser_version)
    _, parser_version = \
        db.run_in_transaction_custom_retries(10, tx_parser_version)

    query = GTestResult.all(keys_only=True)
    query.filter('build_step =', build_step_key)
    query.filter('gtest_parser_version <', parser_version)
    db.delete(query)
Beispiel #11
0
Datei: jobs.py Projekt: blckt/lib
    def main(self, sequence_num):
        logging.info('Drive job waking up')
        job = self.load()
        if not job:
            raise deferred.PermanentTaskFailure(
                'Job object for {} not found!'.format(self._job_name))
        if job.has_finished:
            return

        try:
            # pylint: disable=protected-access
            drive_manager = drive_api_manager._DriveManager.from_app_context(
                self._app_context)
            # pylint: enable=protected-access
        except errors.NotConfigured:
            self.complete(sequence_num)
            return
        except errors.Misconfigured as error:
            logging.error('%s: Drive is misconfigured in %s: %s',
                          self._job_name, self._app_context.get_title(), error)
            raise deferred.PermanentTaskFailure('Job {} failed: {}'.format(
                self._job_name, error))

        try:
            dto = self.get_a_valid_work_item()
        except IndexError:
            self.complete(sequence_num)
            return

        try:
            logging.info('Starting download of %s', dto.title)
            drive_manager.download_file(dto)
            logging.info('Finished download of %s', dto.title)
        except Exception as error:  #pylint: disable=broad-except
            # Normally errors.Error covers everything, but this covers the
            # possibility of an unexpected parse error.
            logging.info('Failed to sync %s (%s) from drive: %s', dto.title,
                         dto.key, error)

        deferred.defer(self.main, sequence_num)
Beispiel #12
0
def update_majors():
    try:
        ndb.delete_multi(Major.get_majors().iter(keys_only=True))
        all_majors = get_majors()

        logging.info("All majors extracted: " + str(len(all_majors)))

        for major in all_majors:
            logging.info(major['discipline'] + " was created")
            deferred.defer(create_major, major)

    except Exception as e:
        logging.error(e)
        raise deferred.PermanentTaskFailure()
Beispiel #13
0
def reparse_suppression_results(build_step_key, _build_step_name):
    step = BuildStep.get(build_step_key)

    log_contents = ''
    if step.log_gs:
        with cloudstorage.open(step.log_gs) as gs_file:
            log_contents = html2text(gs_file.read().decode('utf-8', 'replace'))
    else:
        try:
            blob_reader = blobstore.BlobReader(step.log_stdio)
            log_contents = html2text(blob_reader.read().decode(
                'utf-8', 'replace'))
        except (ValueError, blobstore.BlobNotFoundError), e:
            raise deferred.PermanentTaskFailure(e)
Beispiel #14
0
 def _run(self):
     self.job_dump.update_status(self.MAPPING)
     try:
         self.job_mapper.run()
         self.job_dump.update_status(self.REDUCING)
         self.job_reducer.run(self.job_dump.map_result)
         if not self.save_output:
             self.clean_up()
         self.job_dump.update_status(self.SUCCESS)
     except JobException as e:
         self.clean_up()
         self.job_dump.error = '%s: %s' % (e.job_type, e.message)
         self.job_dump.update_status(self.FAILURE)
         raise deferred.PermanentTaskFailure()
Beispiel #15
0
def create_suggestions_json(suggestions_key):
    suggestions = suggestions_key.get()
    item_ids = suggestions.key.string_id()
    assert isinstance(suggestions, SuggestionsRecord)
    if not suggestions.completed:
        logging.warning(
            "create_suggestions_json for {} called although "
            "suggestions are still not completed.".format(item_ids))
        # deferred.defer(create_suggestions_json, suggestions_key,
        #                _countdown=3)
        return
    json_object = {
        'version': Suggester.VERSION,
        'item_ids': item_ids,
        'job_started': suggestions.job_started.isoformat()
    }
    original_book = suggestions.original_book.get()
    if not original_book:
        raise deferred.PermanentTaskFailure(
            "Original book not found for {} "
            "when creating JSON.".format(item_ids))
    assert isinstance(original_book, BookRecord)
    json_object['original_book'] = {
        'author': original_book.author,
        'title': original_book.title,
        'year': original_book.year
    }
    json_object['status'] = 'completed'
    json_object['suggestions'] = []
    book_records = ndb.get_multi(suggestions.books)
    assert len(book_records) == len(suggestions.books_prediction)
    for i in xrange(len(book_records)):
        book = book_records[i]
        assert isinstance(book, BookRecord)
        json_object['suggestions'].append({
            'author':
            book.author,
            'title':
            book.title,
            'item_ids':
            book.key.string_id(),
            'prediction':
            suggestions.books_prediction[i]
        })
    precomputed_json = json.dumps(json_object, indent=2)
    suggestions.json = precomputed_json
    suggestions.put()
    logging.info("Created and saved JSON for {}".format(item_ids))
    def post(self):
        """Adds a BigQuery row to Datastore and streams it using a deferred task.

    Raises:
      deferred.PermanentTaskFailure: if we encounter any exception to avoid
        adding duplicate rows during task retries.
    """
        payload = pickle.loads(self.request.body)
        bigquery_row_model.BigQueryRow.add(**payload)
        try:
            if bigquery_row_model.BigQueryRow.threshold_reached():
                deferred.defer(bigquery_row_model.BigQueryRow.stream_rows)
            else:
                logging.info('Not streaming rows, thresholds not met.')
        except Exception as e:  # pylint: disable=broad-except
            raise deferred.PermanentTaskFailure(
                'Exception caught for BigQuery streaming: %s.' % e)
Beispiel #17
0
 def wrapper(*args, **kwargs):
     """Wrapper for managed task decorator."""
     status_entity = bootstrap_status_model.BootstrapStatus.get_or_insert(
         task_function.__name__)
     status_entity.description = _TASK_DESCRIPTIONS.get(
         task_function.__name__, task_function.__name__)
     status_entity.timestamp = datetime.datetime.utcnow()
     try:
         task_function(*args, **kwargs)
         status_entity.success = True
         status_entity.details = None
         status_entity.put()
     except Exception as e:
         status_entity.success = False
         status_entity.details = '{} {}'.format(str(type(e)), str(e))
         status_entity.put()
         raise deferred.PermanentTaskFailure(
             'Task {} failed; error: {}'.format(task_function.__name__,
                                                status_entity.details))
Beispiel #18
0
def SpawnDetailsCrawlersIssueTracker(recent_issues,
                                     project_name,
                                     skip_recent_check=False):
    """Queues the tasks to do the actual crawling for recent updates."""
    count = 0
    try:
        for issue in recent_issues:
            bug_id = issue['id']
            logging.info(
                'Adding crawler to the queue for issue_id %s, project: %s.',
                bug_id, project_name)
            end = bug_id.find('/')
            if end > 0:
                bug_id = bug_id[0:end]

            bug = bugs.GetBug(bug_id=bug_id,
                              project=project_name,
                              provider=bugs_util.Provider.ISSUETRACKER)
            if bug:
                if not skip_recent_check and bug.last_update == issue[
                        'updated']:
                    logging.info('Bug %s is up-to-date.',
                                 bug.key().id_or_name())
                    count += 1
                    continue
                else:
                    logging.info('Bug %s needs to be updated.',
                                 bug.key().id_or_name())
            else:
                logging.info('Bug %s seems to be a new issue.', bug_id)
            deferred.defer(ExtractDetailsCrawlerIssueTracker,
                           project_name,
                           bug_id,
                           _queue='find-bugs-queue')
            count += 1
    except DeadlineExceededError:
        remaining = recent_issues[count:]
        deferred.defer(SpawnDetailsCrawlersIssueTracker, remaining,
                       project_name)
        deferred.PermanentTaskFailure(
            'Deadline exceeded, started a new SpawnDetailsCrawler'
            ' for the remaining %d urls.' % len(remaining))
        return
Beispiel #19
0
 def wrapper(*args, **kwargs):
     status_entity = bootstrap_status_model.BootstrapStatus.get_or_insert(
         task_function.__name__)
     docstring_first_line = re.search('^.*', task_function.__doc__)
     if docstring_first_line:
         status_entity.description = docstring_first_line.group(0)
     else:
         status_entity.description = task_function.__name__
     status_entity.timestamp = datetime.datetime.utcnow()
     try:
         task_function(*args, **kwargs)
         status_entity.success = True
         status_entity.details = None
         status_entity.put()
     except Exception as e:
         status_entity.success = False
         status_entity.details = '{} {}'.format(str(type(e)), str(e))
         status_entity.put()
         raise deferred.PermanentTaskFailure(
             'Task {} failed; error: {}'.format(task_function.__name__,
                                                status_entity.details))
Beispiel #20
0
def update_annotations_from_csv(filename="anotace.csv"):
    logging.info("Starting to update from CSV")

    my_default_retry_params = gcs.RetryParams(initial_delay=0.2,
                                              max_delay=5.0,
                                              backoff_factor=2,
                                              max_retry_period=15)
    gcs.set_default_retry_params(my_default_retry_params)
    # bucket_name = os.environ.get('BUCKET_NAME',
    #                              app_identity.get_default_gcs_bucket_name())
    bucket_name = "tisic-knih.appspot.com"
    bucket = '/' + bucket_name
    filename = bucket + '/' + filename
    try:
        if is_dev_server():
            gcs_file = open("anotace.csv")
        else:
            gcs_file = gcs.open(filename)
        # gcs_file.seek(1000)
        r = ucsv.reader(gcs_file, encoding='utf-8')
        r.next()  # Skip first line (header).
        annotation_data = {}

        for row_number, row in enumerate(r):
            if row_number % 10000 == 0:
                logging.info("Processing row number {} of CSV file. ".format(
                    row_number))
            item_id = row[0]
            short_text = row[1]
            long_text = row[2]
            annotation_data[item_id] = (short_text, long_text)
            if len(annotation_data) > MAX_OBJECTS_PER_BATCH:
                deferred.defer(_put_annotations_batch, annotation_data)
                annotation_data = {}

        deferred.defer(_put_annotations_batch, annotation_data)
        gcs_file.close()
    except Exception as e:
        logging.error(e)
        raise deferred.PermanentTaskFailure()
Beispiel #21
0
def __email_task():
    try:
        logging.info("Email task started")
        yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
        stale_game_q = Game.query(Game.active == True,
                                  Game.updated < yesterday)
        stale_games = {}
        for game in stale_game_q.iter():
            player_key = game.active_player_key
            if not player_key in stale_games:
                stale_games[player_key] = []
            stale_games[player_key].append(game.key.id())
        __send_emails(stale_games)
        logging.info("Email task complete")
    except Exception as e:
        # It can be dangerous to allow exceptions to bubble up to the job
        # scheduler in this case -- depending on what happens, it can get
        # stuck in a retry loop and start spamming our users.
        # Instead, log the problem and raise a "stop doing that!" exception.
        msg = "Unhandled exception during email task, aborting"
        logging.exception(msg)
        raise deferred.PermanentTaskFailure(msg)
Beispiel #22
0
def CommitChangeSet(change_key):
    """Attempts to commit and delete a given RuleChangeSet."""
    change = change_key.get()
    if change is None:
        logging.info('Change no longer exists. (already committed?)')
        return

    logging.info('Committing a %s change set of %s rules for blockable %s',
                 change.change_type, len(change.rule_keys),
                 change.blockable_key.id())

    blockable = change.blockable_key.get()
    rules = ndb.get_multi(change.rule_keys)

    if change.change_type == constants.RULE_POLICY.WHITELIST:
        change_func = _Whitelist
    elif change.change_type == constants.RULE_POLICY.BLACKLIST:
        change_func = _Blacklist
    elif change.change_type == constants.RULE_POLICY.REMOVE:
        change_func = _Remove
    elif change.change_type in constants.RULE_POLICY.SET_INSTALLER:
        change_func = _ChangeInstallerState
    else:
        raise NotImplementedError

    try:
        change_func(blockable, rules)
    except api.RequestError:
        # For normal request errors, rely on the builtin task queue retry settings.
        raise
    except Exception as e:  # pylint: disable=broad-except
        # For all other (likely fatal) errors, make sure the task doesn't retry.
        raise deferred.PermanentTaskFailure(repr(e))
    else:
        for rule in rules:
            rule.is_committed = True
        ndb.put_multi(rules)

        change.key.delete()
Beispiel #23
0
def create_major(obj):
    try:
        major_info = parse_url(obj['link'])
        parsed = True

        if (len(major_info['first_year']['first_semester']) == 0
                and len(major_info['first_year']['second_semester']) == 0
                and len(major_info['second_year']['first_semester']) == 0
                and len(major_info['second_year']['second_semester']) == 0
                and len(major_info['third_year']['first_semester']) == 0
                and len(major_info['third_year']['second_semester']) == 0
                and len(major_info['fourth_year']['first_semester']) == 0
                and len(major_info['fourth_year']['second_semester']) == 0):

            parsed = False

        obj['major_description'] = json.dumps(major_info)
        obj['parsed'] = parsed
        Major.add_new(obj)
    except Exception as e:
        logging.error(e)
        raise deferred.PermanentTaskFailure()
Beispiel #24
0
def read_ophan(section_id=None):

    client = memcache.Client()

    last_read = client.get(section_id + ".epoch_seconds")

    if last_read and fresh(last_read): return

    ophan_json = None

    if section_id == 'all':
        ophan_json = ophan.popular()
    else:
        ophan_json = ophan.popular(section_id=section_id)

    if not ophan_json:
        raise deferred.PermanentTaskFailure()

    ophan_data = json.loads(ophan_json)

    resolved_stories = [resolve_content(entry['url']) for entry in ophan_data]

    resolved_stories = [
        story for story in resolved_stories if not story == None
    ]

    client = memcache.Client()

    base_key = 'all'

    if section_id: base_key = section_id

    if len(resolved_stories) > 0:
        client.set(base_key, json.dumps(resolved_stories))
        client.set(base_key + '.epoch_seconds', time.time())

    logging.info("Updated data for section %s; listing %d stories" %
                 (section_id, len(resolved_stories)))
Beispiel #25
0
    def main(self):
        """Main method of the deferred task."""
        logging.info('Job started: %s', self._job_name)

        time_started = time.time()
        old_namespace = namespace_manager.get_namespace()
        try:
            namespace_manager.set_namespace(self._namespace)
            try:
                result = self.run()
                DurableJobEntity.complete_job(self._job_name,
                                              json.dumps(result),
                                              long(time.time() - time_started))
                logging.info('Job completed: %s', self._job_name)
            except Exception as e:
                logging.error(traceback.format_exc())
                logging.error('Job failed: %s\n%s', self._job_name, e)
                DurableJobEntity.fail_job(self._job_name,
                                          traceback.format_exc(),
                                          long(time.time() - time_started))
                raise deferred.PermanentTaskFailure(e)
        finally:
            namespace_manager.set_namespace(old_namespace)
Beispiel #26
0
                # Nuke cache data for private bugs.
                url_bug_map.DeleteBugAndMappings(
                    bug_id, project_name, bugs_util.Provider.ISSUETRACKER)
                return
            else:
                raise BugCrawlerError(
                    'Error while trying to get details for %s. Error %s' %
                    (str(bug_id), str(e)))
        else:
            raise deferred.PermanentTaskFailure(
                'Error hit too many times, aborting '
                'extracting details for bug %s on project %s. Error: %s' %
                (str(bug_id), str(project_name), str(e)))

    if not feed or not feed.entry:
        raise deferred.PermanentTaskFailure(
            'Failed to fetch full details for bug %s', bug_id)

    entry = feed.entry[0]
    urls = []
    if entry.title.text:
        urls = [(u, url_bug_map.UrlPosition.TITLE)
                for u in url_util.ExtractUrls(entry.title.text)]
    if entry.content.text:
        urls.extend([(u, url_bug_map.UrlPosition.MAIN)
                     for u in url_util.ExtractUrls(entry.content.text)])

    comments = GetComments(project_name, bug_id, phclient)
    comments_text = GetTextInComments(comments)
    if comments_text:
        urls.extend([(u, url_bug_map.UrlPosition.COMMENTS)
                     for u in url_util.ExtractUrls(comments_text)])
Beispiel #27
0
def _send_to_keen(survey_key, course_key, answer_keys):
    events = []
    student = Student.query(Student.user == survey_key.get().participant).get()
    course = course_key.get()
    answers = ndb.get_multi(answer_keys)
    for answer in answers:
        question = answer.question.get()
        if answer.string_value != '':
            response = urlfetch.fetch(
                'http://text-processing.com/api/sentiment',
                payload=urllib.urlencode({'text': answer.string_value}),
                method=urlfetch.POST)

            # If we've been throttled, just give up and die
            if response.status_code == 503:
                continue

            elif response.status_code != 200:
                raise deferred.PermanentTaskFailure()

            sentiment = json.loads(response.content)
            answer.sentiment = sentiment['label']
            answer.put()

        lecturer = course.lecturer.get()
        event = {
            'question_key': question.key.urlsafe(),
            'survey_key': answer.key.parent().urlsafe(),
            'course_key': course.key.urlsafe(),
            'course': {
                'name': course.course.get().name,
                'department': course.course.get().department.urlsafe(),
                'faculty': course.course.get().faculty.urlsafe(),
                'school': course.course.get().faculty.get().school.urlsafe(),
            },
            'question_number': question.number,
            'question_text': question.question,
            'lecturer': {
                'key': lecturer.key.urlsafe(),
                'name': lecturer.name,
                'department': lecturer.department.get().name,
                'faculty': lecturer.department.get().faculty.get().name,
            },
            'student': {
                'key': student.key.urlsafe(),
                'age': student.calculate_age(),
                'gender': student.gender,
                'status': student.status,
                'year': student.year,
            },
        }

        if question.question_type == 'closed':
            event['response'] = answer.int_value

        else:
            event['sentiment'] = answer.sentiment

        events.append(event)

    keen.add_events({'answers': events})
    def _send_mail_task(cls,
                        notification_key,
                        payload_key,
                        test_send_mail_fn=None):
        exception = None
        failed_permanently = False
        now = datetime.datetime.utcnow()
        # pylint: disable=unbalanced-tuple-unpacking,unpacking-non-sequence
        notification, payload = db.get([notification_key, payload_key])
        # pylint: enable=unbalanced-tuple-unpacking,unpacking-non-sequence
        send_mail_fn = (test_send_mail_fn
                        if test_send_mail_fn else mail.send_mail)
        sent = False

        COUNTER_SEND_MAIL_TASK_STARTED.inc()

        if not notification:
            COUNTER_SEND_MAIL_TASK_FAILED_PERMANENTLY.inc()
            raise deferred.PermanentTaskFailure('Notification missing: ' +
                                                str(notification_key))

        if not payload:
            COUNTER_SEND_MAIL_TASK_FAILED_PERMANENTLY.inc()
            raise deferred.PermanentTaskFailure('Payload missing: ' +
                                                str(payload_key))

        policy = _RETENTION_POLICIES.get(notification._retention_policy)
        if not policy:
            COUNTER_SEND_MAIL_TASK_FAILED_PERMANENTLY.inc()
            raise deferred.PermanentTaskFailure('Unknown retention policy: ' +
                                                notification._retention_policy)

        if (cls._done(notification) or cls._failed(notification)
                or cls._sent(notification)):
            COUNTER_SEND_MAIL_TASK_SKIPPED.inc()
            COUNTER_SEND_MAIL_TASK_SUCCESS.inc()
            return

        if notification._recoverable_failure_count > _RECOVERABLE_FAILURE_CAP:
            message = (
                'Recoverable failure cap (%s) exceeded for notification with '
                'key %s') % (_RECOVERABLE_FAILURE_CAP, str(notification.key()))
            _LOG.error(message)
            permanent_failure = deferred.PermanentTaskFailure(message)

            try:
                COUNTER_SEND_MAIL_TASK_RECORD_FAILURE_CALLED.inc()
                cls._record_failure(notification,
                                    payload,
                                    permanent_failure,
                                    dt=now,
                                    permanent=True,
                                    policy=policy)
                COUNTER_SEND_MAIL_TASK_RECORD_FAILURE_SUCCESS.inc()
            # Must be vague. pylint: disable=broad-except
            except Exception, e:
                _LOG.error(
                    cls._get_record_failure_error_message(
                        notification, payload, e))
                COUNTER_SEND_MAIL_TASK_RECORD_FAILURE_FAILED.inc()

            COUNTER_SEND_MAIL_TASK_FAILED_PERMANENTLY.inc()
            COUNTER_SEND_MAIL_TASK_FAILURE_CAP_EXCEEDED.inc()

            raise permanent_failure
Beispiel #29
0
def _run_stats(from_time, to_time):
    """ Runs a process to update user statistics and Elo ratings """

    logging.info(u"Generating stats from {0} to {1}".format(
        from_time, to_time))

    if from_time is None or to_time is None:
        # Time range must be specified
        return

    if from_time >= to_time:
        # Null time range
        return

    # Iterate over all finished games within the time span in temporal order
    q = GameModel.query(ndb.AND(GameModel.ts_last_move > from_time, GameModel.ts_last_move <= to_time)) \
        .order(GameModel.ts_last_move) \
        .filter(GameModel.over == True)

    # The accumulated user statistics
    users = dict()

    def _init_stat(user_id, robot_level):
        """ Returns the newest StatsModel instance available for the given user """
        return StatsModel.newest_before(from_time, user_id, robot_level)

    cnt = 0
    ts_last_processed = None

    try:
        # Use i as a progress counter
        i = 0
        for gm in iter_q(q, chunk_size=250):
            i += 1
            lm = Alphabet.format_timestamp(gm.ts_last_move or gm.timestamp)
            p0 = None if gm.player0 is None else gm.player0.id()
            p1 = None if gm.player1 is None else gm.player1.id()
            robot_game = (p0 is None) or (p1 is None)
            if robot_game:
                rl = gm.robot_level
            else:
                rl = 0
            s0 = gm.score0
            s1 = gm.score1

            if (s0 == 0) and (s1 == 0):
                # When a game ends by resigning immediately,
                # make sure that the weaker player
                # doesn't get Elo points for a draw; in fact,
                # ignore such a game altogether in the statistics
                continue

            if p0 is None:
                k0 = "robot-" + str(rl)
            else:
                k0 = p0
            if p1 is None:
                k1 = "robot-" + str(rl)
            else:
                k1 = p1

            if k0 in users:
                urec0 = users[k0]
            else:
                users[k0] = urec0 = _init_stat(p0, rl if p0 is None else 0)
            if k1 in users:
                urec1 = users[k1]
            else:
                users[k1] = urec1 = _init_stat(p1, rl if p1 is None else 0)
            # Number of games played
            urec0.games += 1
            urec1.games += 1
            if not robot_game:
                urec0.human_games += 1
                urec1.human_games += 1
            # Total scores
            urec0.score += s0
            urec1.score += s1
            urec0.score_against += s1
            urec1.score_against += s0
            if not robot_game:
                urec0.human_score += s0
                urec1.human_score += s1
                urec0.human_score_against += s1
                urec1.human_score_against += s0
            # Wins and losses
            if s0 > s1:
                urec0.wins += 1
                urec1.losses += 1
            elif s1 > s0:
                urec1.wins += 1
                urec0.losses += 1
            if not robot_game:
                if s0 > s1:
                    urec0.human_wins += 1
                    urec1.human_losses += 1
                elif s1 > s0:
                    urec1.human_wins += 1
                    urec0.human_losses += 1
            # Find out whether players are established or beginners
            est0 = urec0.games > ESTABLISHED_MARK
            est1 = urec1.games > ESTABLISHED_MARK
            # Save the Elo point state used in the calculation
            gm.elo0, gm.elo1 = urec0.elo, urec1.elo
            # Compute the Elo points of both players
            adj = _compute_elo((urec0.elo, urec1.elo), s0, s1, est0, est1)
            # When an established player is playing a beginning (provisional) player,
            # leave the Elo score of the established player unchanged
            # Adjust player 0
            if est0 and not est1:
                adj = (0, adj[1])
            gm.elo0_adj = adj[0]
            urec0.elo += adj[0]
            # Adjust player 1
            if est1 and not est0:
                adj = (adj[0], 0)
            gm.elo1_adj = adj[1]
            urec1.elo += adj[1]
            # If not a robot game, compute the human-only Elo
            if not robot_game:
                gm.human_elo0, gm.human_elo1 = urec0.human_elo, urec1.human_elo
                adj = _compute_elo((urec0.human_elo, urec1.human_elo), s0, s1,
                                   est0, est1)
                # Adjust player 0
                if est0 and not est1:
                    adj = (0, adj[1])
                gm.human_elo0_adj = adj[0]
                urec0.human_elo += adj[0]
                # Adjust player 1
                if est1 and not est0:
                    adj = (adj[0], 0)
                gm.human_elo1_adj = adj[1]
                urec1.human_elo += adj[1]
            # Save the game object with the new Elo adjustment statistics
            gm.put()
            # Save the last processed timestamp
            ts_last_processed = lm
            cnt += 1
            # Report on our progress
            if i % 1000 == 0:
                logging.info(u"Processed {0} games".format(i))

    except DeadlineExceededError as ex:
        # Hit deadline: save the stuff we already have and
        # defer a new task to continue where we left off
        logging.info(
            u"Deadline exceeded in stats loop after {0} games and {1} users".
            format(cnt, len(users)))
        logging.info(u"Resuming from timestamp {0}".format(ts_last_processed))
        if ts_last_processed is not None:
            _write_stats(ts_last_processed, users)
        deferred.defer(deferred_stats,
                       from_time=ts_last_processed or from_time,
                       to_time=to_time)
        # Normal return prevents this task from being run again
        return

    except Exception as ex:
        logging.info(u"Exception in stats loop: {0}".format(ex))
        # Avoid having the task retried
        raise deferred.PermanentTaskFailure()

    # Completed without incident
    logging.info(
        u"Normal completion of stats for {1} games and {0} users".format(
            len(users), cnt))

    _write_stats(to_time, users)
Beispiel #30
0
def check_bq_job(job_id, item_ids, suggestions_key, page_token):
    bq = BigQueryClient()
    logging.info("Polling suggestion job {}.".format(job_id))
    # TODO: catch 404 errors for jobs created 24+ hours ago, retry with new jobid
    try:
        bq_json = bq.get_async_job_results(job_id, page_token,
                                           MAX_RESULTS_PER_SUGGESTIONS_QUERY)
    except Exception as e:
        logging.error("Error from BigQuery with item_id={}.".format(item_ids))
        raise deferred.PermanentTaskFailure(e)
    if not bq_json['jobComplete']:
        logging.info("- job not completed yet.")
        deferred.defer(check_bq_job,
                       job_id,
                       item_ids,
                       suggestions_key,
                       "",
                       _countdown=5)
        return
    if not 'rows' in bq_json:
        logging.error(
            u"Invalid json for BigQueryTable. Job for {} is probably "
            u"invalid (bad item_id?).\n"
            u"JSON:\n"
            u"{}".format(item_ids, bq_json))
        raise deferred.PermanentTaskFailure("No rows in BigQuery response for "
                                            "{}.".format(item_ids))
    table = BigQueryTable(bq_json)
    item_ids_array = item_ids.split('|')
    # Get the consolidated book for each item_id
    suggestions = suggestions_key.get()
    assert isinstance(suggestions, SuggestionsRecord)
    for row in table.data:
        item_id = row[0]
        prediction = float(row[1])
        if item_id in item_ids_array:
            continue  # Original book.
        consolidated_book_key = BookRecord.query(
            BookRecord.item_id_array == item_id).get(keys_only=True)
        if not consolidated_book_key:
            logging.info(
                "No consolidated book with item_id '{}' found.".format(
                    item_id))
            continue
        if not consolidated_book_key in suggestions.books:
            suggestions.books.append(consolidated_book_key)
            suggestions.books_prediction.append(prediction)
        if len(suggestions.books) >= 1000:
            break
    next_page_token = bq_json.get('pageToken', "")
    if next_page_token != "" and len(suggestions.books) < 1000:
        suggestions.put()
        deferred.defer(check_bq_job, job_id, item_ids, suggestions_key,
                       next_page_token)
        logging.info("Suggestions for item_ids '{}' partly fetched. "
                     "Running again.".format(item_ids))
    else:
        suggestions.completed = True
        suggestions.json_generation_started = True
        suggestions.put()
        logging.info(
            "Suggestions for item_ids '{}' completed and saved.".format(
                item_ids))
        deferred.defer(create_suggestions_json, suggestions.key)