def run(self): """ Read Report file and reformat output Skip initial 3 lines that provide information about file source and header 4th line should contain header information. We are only interested in "Section Body Data" in docs In processed file we are saving "Section Body Data" only. """ with self.input().open('r') as input_file: file_read = input_file.readlines()[3:] reader = csv.DictReader(file_read, delimiter=',') date_time_field = DateTimeField() with self.output().open('w') as output_file: for row in filter(self.filtercsv_row, reader): if row['Response due date'] == '': response_date = None else: response_date = date_time_field.deserialize_from_string(row['Response due date']) record = PayPalCaseReportRecord( case_type=row['Case type'], case_id=row['Case ID'], original_transaction_id=row['Original transaction ID'], transaction_date=date_time_field.deserialize_from_string(row['Transaction date']), transaction_invoice_id=row['Transaction invoice ID'], card_type=row['Card Type'], case_reason=row['Case reason'], claimant_name=row['Claimant name'], claimant_email_address=row['Claimant email address'], case_filing_date=date_time_field.deserialize_from_string(row['Case filing date']), case_status=row['Case status'], response_due_date=response_date, disputed_amount=self.amount_to_decimal(row['Disputed amount']), disputed_currency=row['Disputed currency'], disputed_transaction_id=row['Disputed transaction ID'], money_movement=row['Money movement'], settlement_type=row['Settlement type'], seller_protection=row['Seller protection'], seller_protection_payout_amount=self.amount_to_decimal(row['Seller protection payout amount']), seller_protection_currency=row['Seller protection currency'], payment_tracking_id=row['Payment Tracking ID'], buyer_comments=row['Buyer comments'], store_id=row['Store ID'], chargeback_reason_code=row['Chargeback Reason Code'], outcome=row['Outcome'], report_generation_date=DateField().deserialize_from_string(self.run_date.strftime('%Y-%m-%d')) ) output_file.write(record.to_separated_values()) output_file.write('\n')
def create_input_output(self, attempts): """Returns an array of input problem attempts, and the expected output tuple.""" # Incremented as we loop through the attempts total_attempts = 0 first_attempt_date = None latest_attempt = {} inputs = [] for idx, attempt in enumerate(attempts): answer_id = 'answer_{}'.format(idx) time = '2013-01-01 00:{0:02d}:00.0'.format(idx) correctness = None if attempt['correct'] is None: correctness = 'unknown' elif attempt['correct']: correctness = 'correct' else: correctness = 'incorrect' # Append the problem data to the inputs list problem_data = { 'username': self.USERNAME, 'context': { 'course_id': self.course_id, 'module': { 'display_name': self.problem, }, }, 'problem_id': self.problem_id, 'attempts': idx + 1, 'submission': { answer_id: { 'question': self.question, } }, 'answers': { answer_id: attempt['answer'], }, 'correct_map': { answer_id: { 'correctness': correctness, } }, 'grade': 0, 'max_grade': 1, 'time': time, } inputs.append((time, json.dumps(problem_data))) # Update the summary data, and keep track of the "latest" attempt total_attempts += 1 if not first_attempt_date or time < first_attempt_date: first_attempt_date = time if not latest_attempt or time > latest_attempt['last_attempt_date']: latest_attempt = attempt.copy() latest_attempt.update(dict( answer_id=answer_id, last_attempt_date=time, )) # Construct the expected problem response record date_field = DateTimeField() expected = ProblemResponseRecord( course_id=self.course_id, answer_id=latest_attempt['answer_id'], username=self.USERNAME, problem_id=self.problem_id, problem=self.problem, question=self.question, score=0, max_score=1, correct=latest_attempt['correct'], answer=latest_attempt.get('expected_answer', latest_attempt.get('answer')), total_attempts=total_attempts, first_attempt_date=date_field.deserialize_from_string(first_attempt_date), last_attempt_date=date_field.deserialize_from_string(latest_attempt['last_attempt_date']), location='', sort_idx=0, ) # randomize the inputs, because their order shouldn't matter. random.shuffle(inputs) return (inputs, (expected.to_string_tuple(),))
def reducer(self, key, values): """ Calculate a ProblemResponseRecord from the most recently submitted response to a problem in a course. If the problem response contains multiple "submissions" (i.e. multiple questions), they will be split into separate ProblemResponseRecords. Args: key: (course_id, problem_id, username) values: iterator of (attempt_date, problem_check_json) See ProblemCheckEventMixin.mapper for details. Yields: A key/value tuple for each of the latest problem attempt "submissions", annotated with the aggregated total_attempts, first_attempt_date, and last_attempt_date. ((course_id, answer_id), (problem_id, problem, username, question, score, max_score, correct, answer, total_attempts, first_attempt_date, last_attempt_date)) """ # Parse the map key (course_id, problem_id, username) = key # Sort input values (by timestamp) to easily detect the first # and most recent answer to a problem by a particular user. # Note that this assumes the timestamp values (strings) are in # ISO representation, so that the tuples will be ordered in # ascending time value. values = sorted(values) if not values: return # Get the first entry. first_attempt_date, _first_response = values[0] # Get the last entry last_attempt_date, latest_response = values[-1] # Get attempt count total_attempts = len(values) # Generate a single response record from each answer submission date_time_field = DateTimeField() for answer in self.get_answer_data(latest_response): latest_response_record = ProblemResponseRecord( course_id=course_id, answer_id=answer.get('answer_id'), problem_id=problem_id, problem=answer.get('problem', ''), username=username, question=answer.get('question', ''), score=answer.get('grade', 0), max_score=answer.get('max_grade', 0), correct=answer.get('correct', None), answer=answer.get('answer', ()), total_attempts=total_attempts, first_attempt_date=date_time_field.deserialize_from_string(first_attempt_date), last_attempt_date=date_time_field.deserialize_from_string(last_attempt_date), location='', sort_idx=0, ) yield latest_response_record.to_string_tuple()
def create_input_output(self, attempts): """Returns an array of input problem attempts, and the expected output tuple.""" # Incremented as we loop through the attempts total_attempts = 0 first_attempt_date = None latest_attempt = {} inputs = [] for idx, attempt in enumerate(attempts): answer_id = 'answer_{}'.format(idx) time = '2013-01-01 00:{0:02d}:00.0'.format(idx) correctness = None if attempt['correct'] is None: correctness = 'unknown' elif attempt['correct']: correctness = 'correct' else: correctness = 'incorrect' # Append the problem data to the inputs list problem_data = { 'username': self.USERNAME, 'context': { 'course_id': self.course_id, 'module': { 'display_name': self.problem, }, }, 'problem_id': self.problem_id, 'attempts': idx + 1, 'submission': { answer_id: { 'question': self.question, } }, 'answers': { answer_id: attempt['answer'], }, 'correct_map': { answer_id: { 'correctness': correctness, } }, 'grade': 0, 'max_grade': 1, 'time': time, } inputs.append((time, json.dumps(problem_data))) # Update the summary data, and keep track of the "latest" attempt total_attempts += 1 if not first_attempt_date or time < first_attempt_date: first_attempt_date = time if not latest_attempt or time > latest_attempt['last_attempt_date']: latest_attempt = attempt.copy() latest_attempt.update( dict( answer_id=answer_id, last_attempt_date=time, )) # Construct the expected problem response record date_field = DateTimeField() expected = ProblemResponseRecord( course_id=self.course_id, answer_id=latest_attempt['answer_id'], username=self.USERNAME, problem_id=self.problem_id, problem=self.problem, question=self.question, score=0, max_score=1, correct=latest_attempt['correct'], answer=latest_attempt.get('expected_answer', latest_attempt.get('answer')), total_attempts=total_attempts, first_attempt_date=date_field.deserialize_from_string( first_attempt_date), last_attempt_date=date_field.deserialize_from_string( latest_attempt['last_attempt_date']), location='', sort_idx=0, ) # randomize the inputs, because their order shouldn't matter. random.shuffle(inputs) return (inputs, (expected.to_string_tuple(), ))