def process_course_run(self, course_run, output_file):
     record = CourseRecord(
         course_id=course_run['key'],
         catalog_course=course_run['course'],
         catalog_course_title=course_run.get('title'),
         start_time=DateTimeField().deserialize_from_string(
             course_run.get('start')),
         end_time=DateTimeField().deserialize_from_string(
             course_run.get('end')),
         enrollment_start_time=DateTimeField().deserialize_from_string(
             course_run.get('enrollment_start')),
         enrollment_end_time=DateTimeField().deserialize_from_string(
             course_run.get('enrollment_end')),
         content_language=course_run.get('content_language'),
         pacing_type=course_run.get('pacing_type'),
         level_type=course_run.get('level_type'),
         availability=course_run.get('availability'),
         org_id=get_org_id_for_course(course_run['key']),
         partner_short_code=course_run.get('partner_short_code'),
         marketing_url=course_run.get('marketing_url'),
         min_effort=course_run.get('min_effort'),
         max_effort=course_run.get('max_effort'),
         announcement_time=DateTimeField().deserialize_from_string(
             course_run.get('announcement')),
         reporting_type=course_run.get('reporting_type'),
     )
     output_file.write(record.to_separated_values(sep=u'\t'))
     output_file.write('\n')
class CourseSummaryEnrollmentRecord(Record):
    """Recent enrollment summary and metadata for a course."""
    course_id = StringField(nullable=False,
                            length=255,
                            description='A unique identifier of the course')
    catalog_course_title = StringField(nullable=True,
                                       length=255,
                                       normalize_whitespace=True,
                                       description='The name of the course')
    catalog_course = StringField(nullable=True,
                                 length=255,
                                 description='Course identifier without run')
    start_time = DateTimeField(
        nullable=True, description='The date and time that the course begins')
    end_time = DateTimeField(
        nullable=True, description='The date and time that the course ends')
    pacing_type = StringField(nullable=True,
                              length=255,
                              description='The type of pacing for this course')
    availability = StringField(nullable=True,
                               length=255,
                               description='Availability status of the course')
    enrollment_mode = StringField(
        length=100,
        nullable=False,
        description='Enrollment mode for the enrollment counts')
    count = IntegerField(
        nullable=True, description='The count of currently enrolled learners')
    count_change_7_days = IntegerField(
        nullable=True,
        description='Difference in enrollment counts over the past 7 days')
    cumulative_count = IntegerField(
        nullable=True,
        description='The cumulative total of all users ever enrolled')
Example #3
0
class ProblemResponseRecord(Record):
    """
    Record containing the data for a single user's response to a problem, in a given date range.

    If there are multiple questions in a problem, they are spread over separate ProblemResponseRecords.

    Note that the course_id field is available from the partition string.
    """
    # Data sourced from problem_response tracking logs
    course_id = StringField(description='Course containing the problem.')
    answer_id = StringField(description='Learner\'s answer ID.')
    problem_id = StringField(description='Problem\'s block usage ID.')
    problem = StringField(description='Problem display name, at time of answering.')
    username = StringField(description='Learner\'s username.')
    question = StringField(description='Question\'s display name, at time of answering.')
    score = FloatField(description='Score achieved by the learner.')
    max_score = FloatField(description='Maximum possible score for the problem.')
    correct = BooleanField(nullable=True, description='True if all answers are correct; '
                                                      'False if any answers are not correct; '
                                                      'None if any answers have unknown correctness.')
    answer = DelimitedStringField(description='List of answers the user chose for the question.')
    total_attempts = IntegerField(description='Total number of attempts the user has made on the problem.')
    first_attempt_date = DateTimeField(description='date/time of the first attempt the user has made on the problem.')
    last_attempt_date = DateTimeField(description='date/time of the last attempt the user has made on the problem.')

    # Data sourced from course_blocks
    location = StringField(description='Problem location in the course, concatenated from Section, Subsection, Unit, '
                                       'and problem display name.  Sourced from course_blocks.course_path')
    sort_idx = IntegerField(description='Sort index for the problem location.  Sourced from course_blocks.sort_idx')
Example #4
0
class EnterpriseEnrollmentRecord(Record):
    """Summarizes a course's enrollment by gender and date."""
    enterprise_id = StringField(length=32, nullable=False, description='')
    enterprise_name = StringField(length=255, nullable=False, description='')
    lms_user_id = IntegerField(nullable=False, description='')
    enterprise_user_id = IntegerField(nullable=False, description='')
    course_id = StringField(
        length=255,
        nullable=False,
        description='The course the learner is enrolled in.')
    enrollment_created_timestamp = DateTimeField(nullable=False,
                                                 description='')
    user_current_enrollment_mode = StringField(length=32,
                                               nullable=False,
                                               description='')
    consent_granted = BooleanField(description='')
    letter_grade = StringField(length=32, description='')
    has_passed = BooleanField(description='')
    passed_timestamp = DateTimeField(description='')
    enterprise_sso_uid = StringField(length=255, description='')
    enterprise_site_id = IntegerField(description='')
    course_title = StringField(length=255, description='')
    course_start = DateTimeField(description='')
    course_end = DateTimeField(description='')
    course_pacing_type = StringField(length=32, description='')
    course_duration_weeks = StringField(length=32, description='')
    course_min_effort = IntegerField(description='')
    course_max_effort = IntegerField(description='')
    user_account_creation_timestamp = DateTimeField(description='')
    user_email = StringField(length=255, description='')
    user_username = StringField(length=255, description='')
Example #5
0
class CourseActivityRecord(Record):
    """Represents count of users performing each category of activity each ISO week."""
    course_id = StringField(length=255, nullable=False, description='The course the learner is enrolled in.')
    interval_start = DateTimeField(nullable=False, description='Start time of ISO week.')
    interval_end = DateTimeField(nullable=False, description='End time of ISO week.')
    label = StringField(length=255, nullable=False, description='The name of activity user performed in the interval.')
    count = IntegerField(description='Total count of activities performed between the interval.')
Example #6
0
class AccountRecord(Record):
    """Represents a google analytics account."""
    account_id = IntegerField(description='Google Analytics Account ID',
                              nullable=False)
    account_name = StringField(description='Google Analytics Account Name',
                               nullable=False,
                               length=200)
    created = DateTimeField(description='Time the account was created.',
                            nullable=False)
    updated = DateTimeField(description='Time the account was last modified.',
                            nullable=False)
Example #7
0
class CourseRecord(Record):
    """
    Represents a single course's details as fetched from the edX Courses REST API.
    """
    course_id = StringField(nullable=False,
                            length=255,
                            description='Course identifier.')
    name = StringField(nullable=False,
                       length=255,
                       truncate=True,
                       normalize_whitespace=True,
                       description='Course name, truncated to 255 characters.')
    org = StringField(nullable=False,
                      length=255,
                      description='Course organization.')
    number = StringField(nullable=False,
                         length=255,
                         description='Course number.')
    blocks_url = StringField(nullable=False,
                             description='URL of the course\'s blocks')
    short_description = StringField(
        nullable=True,
        length=255,
        truncate=True,
        normalize_whitespace=True,
        description='Short course description, truncated to 255 characters.')
    enrollment_start = DateTimeField(nullable=True,
                                     description='Enrollment start date.')
    enrollment_end = DateTimeField(nullable=True,
                                   description='Enrollment end date.')
    start_date = DateTimeField(nullable=True, description='Course start date.')
    end_date = DateTimeField(nullable=True, description='Course end date.')
    start_display = StringField(nullable=True,
                                length=255,
                                normalize_whitespace=True,
                                description='Course start date description.')
    start_type = StringField(
        nullable=True,
        length=255,
        normalize_whitespace=True,
        description=
        'Indicates how start_display was set, e.g. "string", "timestamp", "empty".'
    )
    effort = StringField(
        nullable=True,
        length=255,
        truncate=True,
        normalize_whitespace=True,
        description=
        'Description of effort required, truncated to 255 characters.')
    pacing = StringField(nullable=True,
                         length=255,
                         normalize_whitespace=True,
                         description='Description of course pacing strategy.')
Example #8
0
class GradesPersistentCourseGradeRecord(Record):
    id = IntegerField()
    user_id = IntegerField()
    course_id = StringField()
    course_edited_timestamp = DateTimeField()
    course_version = StringField()
    grading_policy_hash = StringField()
    percent_grade = FloatField()
    letter_grade = StringField()
    passed_timestamp = DateTimeField()
    created = DateTimeField()
    modified = DateTimeField()
Example #9
0
    def run(self):
        """
        Read Report file and reformat output
        Skip initial 3 lines that provide information about file source and header
        4th line should contain header information. We are only interested in "Section Body Data" in docs
        In processed file we are saving "Section Body Data" only.
        """
        with self.input().open('r') as input_file:
            file_read = input_file.readlines()[3:]
            reader = csv.DictReader(file_read, delimiter=',')
            date_time_field = DateTimeField()
            with self.output().open('w') as output_file:
                for row in filter(self.filtercsv_row, reader):
                    if row['Response due date'] == '':
                        response_date = None
                    else:
                        response_date = date_time_field.deserialize_from_string(row['Response due date'])

                    record = PayPalCaseReportRecord(
                        case_type=row['Case type'],
                        case_id=row['Case ID'],
                        original_transaction_id=row['Original transaction ID'],
                        transaction_date=date_time_field.deserialize_from_string(row['Transaction date']),
                        transaction_invoice_id=row['Transaction invoice ID'],
                        card_type=row['Card Type'],
                        case_reason=row['Case reason'],
                        claimant_name=row['Claimant name'],
                        claimant_email_address=row['Claimant email address'],
                        case_filing_date=date_time_field.deserialize_from_string(row['Case filing date']),
                        case_status=row['Case status'],
                        response_due_date=response_date,
                        disputed_amount=self.amount_to_decimal(row['Disputed amount']),
                        disputed_currency=row['Disputed currency'],
                        disputed_transaction_id=row['Disputed transaction ID'],
                        money_movement=row['Money movement'],
                        settlement_type=row['Settlement type'],
                        seller_protection=row['Seller protection'],
                        seller_protection_payout_amount=self.amount_to_decimal(row['Seller protection payout amount']),
                        seller_protection_currency=row['Seller protection currency'],
                        payment_tracking_id=row['Payment Tracking ID'],
                        buyer_comments=row['Buyer comments'],
                        store_id=row['Store ID'],
                        chargeback_reason_code=row['Chargeback Reason Code'],
                        outcome=row['Outcome'],
                        report_generation_date=DateField().deserialize_from_string(self.run_date.strftime('%Y-%m-%d'))
                    )
                    output_file.write(record.to_separated_values())
                    output_file.write('\n')
Example #10
0
 def run(self):
     self.remove_output_on_overwrite()
     service = self.create_management_api_service()
     with self.output().open('w') as output_file:
         accounts_response = service.management().accounts().list().execute(
         )
         for account in accounts_response.get('items', []):
             record = AccountRecord(
                 account_id=int(account.get('id')),
                 account_name=account.get('name'),
                 created=DateTimeField().deserialize_from_string(
                     account.get('created')),
                 updated=DateTimeField().deserialize_from_string(
                     account.get('updated')))
             output_file.write(record.to_separated_values(sep=u'\t'))
             output_file.write('\n')
Example #11
0
 def run(self):
     self.remove_output_on_overwrite()
     service = self.create_management_api_service()
     with self.output().open('w') as output_file:
         profiles_response = service.management().profiles().list(
             accountId='~all', webPropertyId='~all').execute()
         for profile in profiles_response.get('items', []):
             record = ProfileRecord(
                 property_id=profile.get('webPropertyId'),
                 profile_id=int(profile.get('id')),
                 profile_name=profile.get('name'),
                 profile_type=profile.get('type'),
                 created=DateTimeField().deserialize_from_string(
                     profile.get('created')),
                 updated=DateTimeField().deserialize_from_string(
                     profile.get('updated')))
             output_file.write(record.to_separated_values(sep=u'\t'))
             output_file.write('\n')
class CourseSeatRecord(Record):
    """Represents a course seat within course run."""
    course_id = StringField(nullable=False, length=255)
    course_seat_type = StringField(nullable=False, length=255)
    course_seat_price = FloatField(nullable=False)
    course_seat_currency = StringField(nullable=False, length=255)
    course_seat_upgrade_deadline = DateTimeField(nullable=True)
    course_seat_credit_provider = StringField(nullable=True, length=255)
    course_seat_credit_hours = IntegerField(nullable=True)
Example #13
0
class ProfileRecord(Record):
    """Represents a Google Analytics View (profile)."""
    property_id = StringField(
        description='Web property ID to which this view (profile) belongs.',
        nullable=False,
        length=20)
    profile_id = IntegerField(description='View (Profile) ID.', nullable=False)
    profile_name = StringField(description='Name of this view (profile).',
                               nullable=False,
                               length=200)
    profile_type = StringField(description='View (Profile) type. WEB or APP.',
                               nullable=False,
                               length=10)
    created = DateTimeField(
        description='Time this view (profile) was created.', nullable=False)
    updated = DateTimeField(
        description='Time this view (profile) was last modified.',
        nullable=False)
class CourseRecord(Record):
    """Represents a course."""
    course_id = StringField(nullable=False, length=255)
    catalog_course = StringField(nullable=False, length=255)
    catalog_course_title = StringField(nullable=True,
                                       length=255,
                                       normalize_whitespace=True)
    start_time = DateTimeField(nullable=True)
    end_time = DateTimeField(nullable=True)
    enrollment_start_time = DateTimeField(nullable=True)
    enrollment_end_time = DateTimeField(nullable=True)
    content_language = StringField(nullable=True, length=50)
    pacing_type = StringField(nullable=True, length=255)
    level_type = StringField(nullable=True, length=255)
    availability = StringField(nullable=True, length=255)
    org_id = StringField(nullable=False, length=255)
    partner_short_code = StringField(nullable=True, length=8)
    marketing_url = StringField(nullable=True, length=1024)
    min_effort = IntegerField(nullable=True)
    max_effort = IntegerField(nullable=True)
class EnterpriseUserRecord(Record):
    """Summarizes an enterprise user"""
    enterprise_id = StringField(length=32, nullable=False, description='')
    lms_user_id = IntegerField(nullable=False, description='')
    enterprise_user_id = IntegerField(nullable=False, description='')
    enterprise_sso_uid = StringField(length=255, description='')
    user_account_creation_timestamp = DateTimeField(description='')
    user_email = StringField(length=255, description='')
    user_username = StringField(length=255, description='')
    user_country_code = StringField(length=2, description='')
    last_activity_date = DateField(description='')
Example #16
0
class PropertyRecord(Record):
    """Represents a Google Analytics Web Property."""
    account_id = IntegerField(
        description='Account ID to which this web property belongs.',
        nullable=False)
    property_id = StringField(
        description='Web property ID of the form UA-XXXXX-YY.',
        nullable=False,
        length=20)
    property_name = StringField(description='Name of this web property.',
                                nullable=False,
                                length=200)
    website_url = StringField(description='Website url for this web property.',
                              nullable=True,
                              length=255)
    created = DateTimeField(description='Time this web property was created.',
                            nullable=False)
    updated = DateTimeField(
        description='Time this web property was last modified.',
        nullable=False)
 def process_course_run(self, course_run, output_file):
     for seat in course_run.get('seats', []):
         record = CourseSeatRecord(
             course_id=course_run['key'],
             course_seat_type=seat['type'],
             course_seat_price=float(seat.get('price', 0)),
             course_seat_currency=seat.get('currency'),
             course_seat_upgrade_deadline=DateTimeField(
             ).deserialize_from_string(seat.get('upgrade_deadline')),
             course_seat_credit_provider=seat.get('credit_provider'),
             course_seat_credit_hours=seat.get('credit_hours'))
         output_file.write(record.to_separated_values(sep=u'\t'))
         output_file.write('\n')
class EnrollmentSummaryRecord(Record):
    """Summarizes a user's enrollment history for a particular course."""

    course_id = StringField(length=255,
                            nullable=False,
                            description='Course the learner enrolled in.')
    user_id = IntegerField(nullable=False,
                           description='The user\'s numeric identifier.')
    current_enrollment_mode = StringField(
        length=100,
        nullable=False,
        description='The last mode seen on an activation or mode change event.'
    )
    current_enrollment_is_active = BooleanField(
        nullable=False,
        description=
        'True if the user is currently enrolled as of the end of the interval.'
    )
    first_enrollment_mode = StringField(
        length=100,
        nullable=True,
        description='The mode the user first enrolled with.')
    first_enrollment_time = DateTimeField(
        nullable=True, description='The time of the user\'s first enrollment.')
    last_unenrollment_time = DateTimeField(
        nullable=True,
        description='The time of the user\'s last unenrollment.')
    first_verified_enrollment_time = DateTimeField(
        nullable=True,
        description='The time the user first switched to the verified track.')
    first_credit_enrollment_time = DateTimeField(
        nullable=True,
        description='The time the user first switched to the credit track.')
    end_time = DateTimeField(
        nullable=False,
        description='The end of the interval that was analyzed.')
 def test_serialize_to_string(self, date, expected):
     self.assertEqual(DateTimeField().serialize_to_string(date), expected)
Example #20
0
    def reducer(self, key, values):
        """
        Calculate a ProblemResponseRecord from the most recently submitted
        response to a problem in a course.

        If the problem response contains multiple "submissions"
        (i.e. multiple questions), they will be split into separate
        ProblemResponseRecords.

        Args:
            key:  (course_id, problem_id, username)
            values:  iterator of (attempt_date, problem_check_json)

            See ProblemCheckEventMixin.mapper for details.

        Yields:
            A key/value tuple for each of the latest problem attempt
            "submissions", annotated with the aggregated total_attempts,
            first_attempt_date, and last_attempt_date.

            ((course_id, answer_id),
             (problem_id, problem, username, question, score, max_score, correct, answer,
              total_attempts, first_attempt_date, last_attempt_date))
        """
        # Parse the map key
        (course_id, problem_id, username) = key

        # Sort input values (by timestamp) to easily detect the first
        # and most recent answer to a problem by a particular user.
        # Note that this assumes the timestamp values (strings) are in
        # ISO representation, so that the tuples will be ordered in
        # ascending time value.
        values = sorted(values)
        if not values:
            return

        # Get the first entry.
        first_attempt_date, _first_response = values[0]

        # Get the last entry
        last_attempt_date, latest_response = values[-1]

        # Get attempt count
        total_attempts = len(values)

        # Generate a single response record from each answer submission
        date_time_field = DateTimeField()
        for answer in self.get_answer_data(latest_response):
            latest_response_record = ProblemResponseRecord(
                course_id=course_id,
                answer_id=answer.get('answer_id'),
                problem_id=problem_id,
                problem=answer.get('problem', ''),
                username=username,
                question=answer.get('question', ''),
                score=answer.get('grade', 0),
                max_score=answer.get('max_grade', 0),
                correct=answer.get('correct', None),
                answer=answer.get('answer', ()),
                total_attempts=total_attempts,
                first_attempt_date=date_time_field.deserialize_from_string(first_attempt_date),
                last_attempt_date=date_time_field.deserialize_from_string(last_attempt_date),
                location='',
                sort_idx=0,
            )

            yield latest_response_record.to_string_tuple()
    def create_input_output(self, attempts):
        """Returns an array of input problem attempts, and the expected output tuple."""

        # Incremented as we loop through the attempts
        total_attempts = 0
        first_attempt_date = None
        latest_attempt = {}

        inputs = []
        for idx, attempt in enumerate(attempts):
            answer_id = 'answer_{}'.format(idx)
            time = '2013-01-01 00:{0:02d}:00.0'.format(idx)
            correctness = None
            if attempt['correct'] is None:
                correctness = 'unknown'
            elif attempt['correct']:
                correctness = 'correct'
            else:
                correctness = 'incorrect'

            # Append the problem data to the inputs list
            problem_data = {
                'username': self.USERNAME,
                'context': {
                    'course_id': self.course_id,
                    'module': {
                        'display_name': self.problem,
                    },
                },
                'problem_id': self.problem_id,
                'attempts': idx + 1,
                'submission': {
                    answer_id: {
                        'question': self.question,
                    }
                },
                'answers': {
                    answer_id: attempt['answer'],
                },
                'correct_map': {
                    answer_id: {
                        'correctness': correctness,
                    }
                },
                'grade': 0,
                'max_grade': 1,
                'time': time,
            }
            inputs.append((time, json.dumps(problem_data)))

            # Update the summary data, and keep track of the "latest" attempt
            total_attempts += 1
            if not first_attempt_date or time < first_attempt_date:
                first_attempt_date = time
            if not latest_attempt or time > latest_attempt['last_attempt_date']:
                latest_attempt = attempt.copy()
                latest_attempt.update(dict(
                    answer_id=answer_id,
                    last_attempt_date=time,
                ))

        # Construct the expected problem response record
        date_field = DateTimeField()
        expected = ProblemResponseRecord(
            course_id=self.course_id,
            answer_id=latest_attempt['answer_id'],
            username=self.USERNAME,
            problem_id=self.problem_id,
            problem=self.problem,
            question=self.question,
            score=0,
            max_score=1,
            correct=latest_attempt['correct'],
            answer=latest_attempt.get('expected_answer', latest_attempt.get('answer')),
            total_attempts=total_attempts,
            first_attempt_date=date_field.deserialize_from_string(first_attempt_date),
            last_attempt_date=date_field.deserialize_from_string(latest_attempt['last_attempt_date']),
            location='',
            sort_idx=0,
        )
        # randomize the inputs, because their order shouldn't matter.
        random.shuffle(inputs)
        return (inputs, (expected.to_string_tuple(),))
Example #22
0
 def test_validate_error(self, value, expected_error):
     test_record = DateTimeField()
     errors = test_record.validate(value)
     self.assertEqual(len(errors), 1)
     self.assertEqual(errors[0], expected_error)
Example #23
0
class PayPalCaseReportRecord(Record):
    """Record in PayPal Case Report """

    case_type = StringField(
        length=50, nullable=True,
        description='Type of case made against the transaction for e.g. Chargeback, Dispute, Claim etc'
    )
    case_id = StringField(
        length=18, nullable=True,
        description='PayPal generated unique ID for Case'
    )
    original_transaction_id = StringField(
        length=255, nullable=True,
        description='PayPal generated ID of transaction against which Case was filed'
    )
    transaction_date = DateTimeField(
        nullable=True,
        description='Completion date of the transaction'
    )
    transaction_invoice_id = StringField(
        length=255, nullable=True,
        description='Invoice ID provided with transaction'
    )
    card_type = StringField(
        length=255, nullable=True,
        description='Credit Card type used for the transaction'
    )
    case_reason = StringField(
        length=255, nullable=True,
        description='Systematic reason for the case like Inquiry by PayPal, Item not received, Not as described etc'
    )
    claimant_name = StringField(
        length=128, nullable=True,
        description='Name of the claimant'
    )
    claimant_email_address = StringField(
        length=128, nullable=True,
        description='PayPal email address of the buyer'
    )
    case_filing_date = DateTimeField(
        nullable=True,
        description='Date that the case was originally filed with PayPal'
    )
    case_status = StringField(
        length=255, nullable=True,
        description='State or the status of case'
    )
    response_due_date = DateTimeField(
        nullable=True,
        description='Date by which filed case should be responded'
    )
    disputed_amount = IntegerField(
        nullable=True,
        description='Amount being disputed by the buyer in the original transaction'
    )
    disputed_currency = StringField(
        length=255, nullable=True,
        description='Currency of the disputed amount'
    )
    disputed_transaction_id = StringField(
        length=255, nullable=True,
        description='Transaction ID generated at the time of the money movement event'
    )
    money_movement = StringField(
        length=255, nullable=True,
        description='PayPal amount status like Credit, Debit, On temporary hold etc'
    )
    settlement_type = StringField(
        length=255, nullable=True,
        description='Mode to return money to buyer'
    )
    seller_protection = StringField(
        length=255, nullable=True,
        description='Specifies the Seller Protection status'
    )
    seller_protection_payout_amount = IntegerField(
        nullable=True,
        description='Amount PayPal paid on the behalf of the seller to the buyer as a result of the Seller Protection coverage'
    )
    seller_protection_currency = StringField(
        length=255, nullable=True,
        description='Seller protection currency'
    )
    payment_tracking_id = StringField(
        length=255, nullable=True,
        description='Unique ID to obtain information about payment or refund'
    )
    buyer_comments = StringField(
        length=500, nullable=True,
        description='Comments by buyer'
    )
    store_id = StringField(
        length=255, nullable=True,
        description='Merchant identifier of the store where the purchase occurred'
    )
    chargeback_reason_code = StringField(
        length=32, nullable=True,
        description='Unique identifier to distinguish chargeback nature / reason'
    )
    outcome = StringField(
        length=3000, nullable=True,
        description='Outcome'
    )
    report_generation_date = DateField(
        length=10, nullable=False,
        description='Report file generation date'
    )
 def format_timestamp(event):
     """Given an event, return a datetime object for its timestamp."""
     if event is None or event.timestamp is None:
         return None
     return DateTimeField().deserialize_from_string(event.timestamp)
    def reducer(self, key, values):
        """Emit one record per user course enrollment, summarizing their enrollment activity."""
        course_id, user_id = key

        sorted_events = sorted(values)
        sorted_events = [
            EnrollmentEvent(timestamp, event_type, mode)
            for timestamp, event_type, mode in sorted_events
        ]
        first_enroll_event = None
        last_unenroll_event = None
        first_event_by_mode = {}
        most_recent_mode = None
        state = UNENROLLED

        for event in sorted_events:
            is_enrolled_mode_change = (state == ENROLLED
                                       and event.event_type == MODE_CHANGED)
            is_enrolled_deactivate = (state == ENROLLED
                                      and event.event_type == DEACTIVATED)
            is_unenrolled_activate = (state == UNENROLLED
                                      and event.event_type == ACTIVATED)

            if is_enrolled_deactivate:
                state = UNENROLLED
                last_unenroll_event = event
                # If we see more than one deactivate in a row, we only consider the first one as the last unenrollment.
                if event.mode != most_recent_mode:
                    self.incr_counter('Enrollment State',
                                      'Deactivation Mode Changed', 1)
            elif is_unenrolled_activate or is_enrolled_mode_change:
                if event.event_type == ACTIVATED:
                    # If we see multiple activation events in a row, consider the first one to be the first enrollment.
                    state = ENROLLED
                    if first_enroll_event is None:
                        first_enroll_event = event

                if event.event_type == MODE_CHANGED:
                    if event.mode == most_recent_mode:
                        self.incr_counter('Enrollment State',
                                          'Redundant Mode Change', 1)

                # The most recent mode is computed from the activation and mode changes. If we see a different mode
                # on the deactivation event, it is ignored. It's unclear in many of these cases which event to trust,
                # so fairly arbitrary decisions have been made.
                most_recent_mode = event.mode
                if event.mode not in first_event_by_mode:
                    first_event_by_mode[event.mode] = event
            else:
                # increment counters for invalid events
                if state == ENROLLED and event.event_type == ACTIVATED:
                    if event.mode == most_recent_mode:
                        self.incr_counter('Enrollment State',
                                          'Enrolled Activation', 1)
                    else:
                        self.incr_counter('Enrollment State',
                                          'Enrolled Activation Mode Changed',
                                          1)
                elif state == UNENROLLED:
                    if event.event_type == DEACTIVATED:
                        self.incr_counter('Enrollment State',
                                          'Unenrolled Deactivation', 1)
                    elif event.event_type == MODE_CHANGED:
                        self.incr_counter('Enrollment State',
                                          'Unenrolled Mode Change', 1)

        if first_enroll_event is None:
            # The user only has deactivate and mode change events... that's odd, just throw away the record.
            self.incr_counter('Enrollment State', 'Missing Enrollment Event',
                              1)
            return

        record = EnrollmentSummaryRecord(
            course_id=course_id,
            user_id=int(user_id),
            current_enrollment_mode=most_recent_mode,
            current_enrollment_is_active=(state == ENROLLED),
            first_enrollment_mode=first_enroll_event.mode,
            first_enrollment_time=self.format_timestamp(first_enroll_event),
            last_unenrollment_time=self.format_timestamp(last_unenroll_event),
            first_verified_enrollment_time=self.format_timestamp(
                first_event_by_mode.get('verified')),
            first_credit_enrollment_time=self.format_timestamp(
                first_event_by_mode.get('credit')),
            end_time=DateTimeField().deserialize_from_string(
                self.interval.date_b.isoformat()))
        yield record.to_string_tuple()
Example #26
0
    def create_input_output(self, attempts):
        """Returns an array of input problem attempts, and the expected output tuple."""

        # Incremented as we loop through the attempts
        total_attempts = 0
        first_attempt_date = None
        latest_attempt = {}

        inputs = []
        for idx, attempt in enumerate(attempts):
            answer_id = 'answer_{}'.format(idx)
            time = '2013-01-01 00:{0:02d}:00.0'.format(idx)
            correctness = None
            if attempt['correct'] is None:
                correctness = 'unknown'
            elif attempt['correct']:
                correctness = 'correct'
            else:
                correctness = 'incorrect'

            # Append the problem data to the inputs list
            problem_data = {
                'username': self.USERNAME,
                'context': {
                    'course_id': self.course_id,
                    'module': {
                        'display_name': self.problem,
                    },
                },
                'problem_id': self.problem_id,
                'attempts': idx + 1,
                'submission': {
                    answer_id: {
                        'question': self.question,
                    }
                },
                'answers': {
                    answer_id: attempt['answer'],
                },
                'correct_map': {
                    answer_id: {
                        'correctness': correctness,
                    }
                },
                'grade': 0,
                'max_grade': 1,
                'time': time,
            }
            inputs.append((time, json.dumps(problem_data)))

            # Update the summary data, and keep track of the "latest" attempt
            total_attempts += 1
            if not first_attempt_date or time < first_attempt_date:
                first_attempt_date = time
            if not latest_attempt or time > latest_attempt['last_attempt_date']:
                latest_attempt = attempt.copy()
                latest_attempt.update(
                    dict(
                        answer_id=answer_id,
                        last_attempt_date=time,
                    ))

        # Construct the expected problem response record
        date_field = DateTimeField()
        expected = ProblemResponseRecord(
            course_id=self.course_id,
            answer_id=latest_attempt['answer_id'],
            username=self.USERNAME,
            problem_id=self.problem_id,
            problem=self.problem,
            question=self.question,
            score=0,
            max_score=1,
            correct=latest_attempt['correct'],
            answer=latest_attempt.get('expected_answer',
                                      latest_attempt.get('answer')),
            total_attempts=total_attempts,
            first_attempt_date=date_field.deserialize_from_string(
                first_attempt_date),
            last_attempt_date=date_field.deserialize_from_string(
                latest_attempt['last_attempt_date']),
            location='',
            sort_idx=0,
        )
        # randomize the inputs, because their order shouldn't matter.
        random.shuffle(inputs)
        return (inputs, (expected.to_string_tuple(), ))
 def test_deserialize_from_string(self, string, expected):
     self.assertEqual(DateTimeField().deserialize_from_string(string),
                      expected)
Example #28
0
 def test_validate_success(self, value):
     test_record = DateTimeField()
     self.assertEqual(len(test_record.validate(value)), 0)
 def test_validate_success(self, value):
     test_record = DateTimeField()
     self.assertEqual(len(test_record.validate(value)), 0)
class SampleElasticSearchStruct(Record):
    """A record with a variety of field types to illustrate all elasticsearch properties"""
    name = StringField()
    index = IntegerField(analyzed=True)
    date = DateField()
    dateTime = DateTimeField()
 def test_validate_error(self, value, expected_error):
     test_record = DateTimeField()
     errors = test_record.validate(value)
     self.assertEqual(len(errors), 1)
     self.assertEqual(errors[0], expected_error)
 def setUp(self):
     super(DateTimeFieldTzUtcTest, self).setUp()
     self.utc_tz = DateTimeField.TzUtc()
     self.now = datetime.datetime.now()