Ejemplo n.º 1
0
    def main(self):
        # By the time main() is invoked, arguments are parsed and available as
        # self.args. If you need more complicated argument validation than
        # argparse gives you, do it here:
        if self.args.batch_size < 1:
            sys.exit('--batch size must be positive')
        if os.path.exists(self.args.path):
            sys.exit('Cannot download to %s; file exists' % self.args.path)

        # Arguments passed to etl.py are also parsed and available as
        # self.etl_args. Here we use them to figure out the requested course's
        # namespace.
        namespace = etl_lib.get_context(
            self.etl_args.course_url_prefix).get_namespace_name()

        # Because our models are namespaced, we need to change to the requested
        # course's namespace when doing datastore reads.
        with common_utils.Namespace(namespace):

            # This base query can be modified to add whatever filters you need.
            query = models.Student.all()
            students = common_utils.iter_all(query, self.args.batch_size)

            # Write the results. Done!
            with open(self.args.path, 'w') as f:
                for student in students:
                    f.write(student.email)
                    f.write('\n')
Ejemplo n.º 2
0
    def main(self):
        # By the time main() is invoked, arguments are parsed and available as
        # self.args. If you need more complicated argument validation than
        # argparse gives you, do it here:
        if self.args.batch_size < 1:
            sys.exit('--batch size must be positive')
        if os.path.exists(self.args.path):
            sys.exit('Cannot download to %s; file exists' % self.args.path)

        # Arguments passed to etl.py are also parsed and available as
        # self.etl_args. Here we use them to figure out the requested course's
        # namespace.
        namespace = etl_lib.get_context(
            self.etl_args.course_url_prefix).get_namespace_name()

        # Because our models are namespaced, we need to change to the requested
        # course's namespace before doing datastore reads or we won't find its
        # data. Get the current namespace so we can change back when we're done.
        old_namespace = namespace_manager.get_namespace()
        try:
            namespace_manager.set_namespace(namespace)
            # For this example, we'll only process the first 1000 results. Can
            # do a keys_only query because the student's email is key.name().
            keys = models.Student.all(keys_only=True).fetch(1000)
        finally:
            # The current namespace is global state. We must change it back to
            # the old value no matter what to prevent corrupting datastore
            # operations that run after us.
            namespace_manager.set_namespace(old_namespace)

        # Write the results. Done!
        with open(self.args.path, 'w') as f:
            for key in keys:
                f.write(str(key.name() + '\n'))
Ejemplo n.º 3
0
    def main(self):
        # By the time main() is invoked, arguments are parsed and available as
        # self.args. If you need more complicated argument validation than
        # argparse gives you, do it here:
        if self.args.batch_size < 1:
            sys.exit('--batch size must be positive')
        if os.path.exists(self.args.path):
            sys.exit('Cannot download to %s; file exists' % self.args.path)

        # Arguments passed to etl.py are also parsed and available as
        # self.etl_args. Here we use them to figure out the requested course's
        # namespace.
        namespace = etl_lib.get_context(
            self.etl_args.course_url_prefix).get_namespace_name()

        # Because our models are namespaced, we need to change to the requested
        # course's namespace when doing datastore reads.
        with common_utils.Namespace(namespace):

            # This base query can be modified to add whatever filters you need.
            query = models.Student.all()
            students = common_utils.iter_all(query, self.args.batch_size)

            # Write the results. Done!
            with open(self.args.path, 'w') as f:
                for student in students:
                    f.write(student.email)
                    f.write('\n')
Ejemplo n.º 4
0
def _upload(upload_type, archive_path, course_url_prefix, force_overwrite):
    _LOG.info("Processing course with URL prefix %s from archive path %s", course_url_prefix, archive_path)
    context = etl_lib.get_context(course_url_prefix)
    if not context:
        _die("No course found with course_url_prefix %s" % course_url_prefix)
    if upload_type == _TYPE_COURSE:
        _upload_course(context, archive_path, course_url_prefix, force_overwrite)
    elif upload_type == _TYPE_DATASTORE:
        _upload_datastore()
Ejemplo n.º 5
0
def _upload(upload_type, archive_path, course_url_prefix):
    _LOG.info('Processing course with URL prefix %s from archive path %s',
              course_url_prefix, archive_path)
    context = etl_lib.get_context(course_url_prefix)
    if not context:
        _die('No course found with course_url_prefix %s' % course_url_prefix)
    if upload_type == _TYPE_COURSE:
        _upload_course(context, archive_path, course_url_prefix)
    elif upload_type == _TYPE_DATASTORE:
        _upload_datastore()
Ejemplo n.º 6
0
    def main(self):
        # By the time main() is invoked, arguments are parsed and available as
        # self.args. If you need more complicated argument validation than
        # argparse gives you, do it here:
        if self.args.batch_size < 1:
            sys.exit('--batch size must be positive')
        if not os.path.isdir(self.args.path):
            sys.exit('Cannot download to %s; Its not a directory' %
                     self.args.path)

        # Arguments passed to etl.py are also parsed and available as
        # self.etl_args. Here we use them to figure out the requested course's
        # namespace.
        namespace = etl_lib.get_context(
            self.etl_args.course_url_prefix).get_namespace_name()

        # Because our models are namespaced, we need to change to the requested
        # course's namespace before doing datastore reads or we won't find its
        # data. Get the current namespace so we can change back when we're done.
        old_namespace = namespace_manager.get_namespace()
        try:
            namespace_manager.set_namespace(namespace)

            app_context = sites.get_app_context_for_namespace(namespace)
            course = courses.Course(None, app_context=app_context)
            if not course:
                return

            units = course.get_units()
            for unit in units:
                if unit.type != 'PA':
                    continue
                content = prog_assignment.ProgAssignmentBaseHandler.get_content(
                    course, unit)
                f = open(
                    self.args.path + '/' + namespace + '-problem-' +
                    str(unit.unit_id), 'w')
                f.write(transforms.dumps(content))

        finally:
            # The current namespace is global state. We must change it back to
            # the old value no matter what to prevent corrupting datastore
            # operations that run after us.
            namespace_manager.set_namespace(old_namespace)
Ejemplo n.º 7
0
    def main(self):
        # By the time main() is invoked, arguments are parsed and available as
        # self.args. If you need more complicated argument validation than
        # argparse gives you, do it here:
        if self.args.batch_size < 1:
            sys.exit('--batch size must be positive')
        if not os.path.isdir(self.args.path):
            sys.exit('Cannot download to %s; Its not a directory' %
                     self.args.path)

        # Arguments passed to etl.py are also parsed and available as
        # self.etl_args. Here we use them to figure out the requested course's
        # namespace.
        namespace = etl_lib.get_context(
            self.etl_args.course_url_prefix).get_namespace_name()

        file_dict = dict()
        # Because our models are namespaced, we need to change to the requested
        # course's namespace before doing datastore reads or we won't find its
        # data. Get the current namespace so we can change back when we're done.
        old_namespace = namespace_manager.get_namespace()
        try:
            namespace_manager.set_namespace(namespace)
            # For this example, we'll only process the first 1000 results. Can
            # do a keys_only query because the student's email is key.name().
            for sub in iter_all(student_work.Submission.all()):
                print sub.key().name()
                unit_id = sub.unit_id
                if unit_id not in file_dict:
                    f = open(
                        self.args.path + '/' + namespace + '-' + str(unit_id),
                        'w')
                    file_dict[unit_id] = f
                data = dict()
                data['unit_id'] = unit_id
                data['user'] = sub.key().name()
                data['code'] = sub.contents
                file_dict[unit_id].write(transforms.dumps(data) + '\n')
        finally:
            # The current namespace is global state. We must change it back to
            # the old value no matter what to prevent corrupting datastore
            # operations that run after us.
            namespace_manager.set_namespace(old_namespace)
Ejemplo n.º 8
0
    def main(self):
        # By the time main() is invoked, arguments are parsed and available as
        # self.args. If you need more complicated argument validation than
        # argparse gives you, do it here:
        if not os.path.exists(self.args.path):
            sys.exit('%s does not exist' % self.args.path)

        # Arguments passed to etl.py are also parsed and available as
        # self.etl_args. Here we use them to figure out the requested course's
        # context.
        context = etl_lib.get_context(self.etl_args.course_url_prefix)
        # Create the absolute path we'll write to.
        remote_path = os.path.join(
            appengine_config.BUNDLE_ROOT, self.args.target)

        with open(self.args.path) as f:
            # Perform the write using the context's filesystem. In a real
            # program you'd probably want to do additional work (preventing
            # overwrites of existing files, etc.).
            context.fs.impl.put(remote_path, f, is_draft=False)
Ejemplo n.º 9
0
    def main(self):
        # By the time main() is invoked, arguments are parsed and available as
        # self.args. If you need more complicated argument validation than
        # argparse gives you, do it here:
        if not os.path.exists(self.args.path):
            sys.exit('%s does not exist' % self.args.path)

        # Arguments passed to etl.py are also parsed and available as
        # self.etl_args. Here we use them to figure out the requested course's
        # context.
        context = etl_lib.get_context(self.etl_args.course_url_prefix)
        # Create the absolute path we'll write to.
        remote_path = os.path.join(
            appengine_config.BUNDLE_ROOT, self.args.target)

        with open(self.args.path) as f:
            # Perform the write using the context's filesystem. In a real
            # program you'd probably want to do additional work (preventing
            # overwrites of existing files, etc.).
            context.fs.impl.put(remote_path, f, is_draft=False)
Ejemplo n.º 10
0
    def _get_app_context_or_die(cls, course_url_prefix):
        app_context = etl_lib.get_context(course_url_prefix)
        if not app_context:
            _die('Unable to find course with url prefix ' + course_url_prefix)

        return app_context
Ejemplo n.º 11
0
    def _get_app_context_or_die(cls, course_url_prefix):
        app_context = etl_lib.get_context(course_url_prefix)
        if not app_context:
            _die('Unable to find course with url prefix ' + course_url_prefix)

        return app_context