Example #1
0
    def test_end_to_end(self):
        self.task.launch([
            'BuildFinancialReportsTask',
            '--import-date', self.UPPER_BOUND_DATE,
            '--n-reduce-tasks', str(self.NUM_REDUCERS),
        ])

        final_output_task = LoadInternalReportingOrderTransactionsToWarehouse(import_date=self.UPPER_BOUND_DATE)
        columns = [x[0] for x in final_output_task.columns]

        with self.vertica.cursor() as cursor:
            expected_output_csv = os.path.join(self.data_dir, 'output', 'expected_financial_report.csv')
            expected = pandas.read_csv(expected_output_csv, parse_dates=True)

            cursor.execute("SELECT {columns} FROM {schema}.f_orderitem_transactions".format(
                columns=','.join(columns),
                schema=self.vertica.schema_name
            ))
            response = cursor.fetchall()
            f_orderitem_transactions = pandas.DataFrame(response, columns=columns)

            try:  # A ValueError will be thrown if the column names don't match or the two data frames are not square.
                self.assertTrue(all(f_orderitem_transactions == expected))
            except ValueError:
                buf = StringIO()
                f_orderitem_transactions.to_csv(buf)
                print 'Actual:'
                print buf.getvalue()
                buf.seek(0)
                expected.to_csv(buf)
                print 'Expected:'
                print buf.getvalue()
                self.fail("Expected and returned data frames have different shapes or labels.")
Example #2
0
    def test_end_to_end_without_vertica(self):
        # Similar to test_end_to_end but it excludes the vertica part and it checks data values,
        # not just data shape.
        table_name = 'reconciled_order_transactions'
        output_root = url_path_join(
            self.warehouse_path, table_name, 'dt=' + self.UPPER_BOUND_DATE
        ) + '/'
        self.task.launch([
            'ReconcileOrdersAndTransactionsTask',
            '--import-date', self.UPPER_BOUND_DATE,
            '--n-reduce-tasks', str(self.NUM_REDUCERS),
            '--output-root', output_root,
        ])
        final_output_task = LoadInternalReportingOrderTransactionsToWarehouse(
            import_date=luigi.DateParameter().parse(self.UPPER_BOUND_DATE)
        )
        columns = [x[0] for x in final_output_task.columns]

        expected_output_csv = os.path.join(self.data_dir, 'output', 'expected_financial_report.csv')
        expected = pandas.read_csv(expected_output_csv, parse_dates=True)

        raw_output = self.read_dfs_directory(output_root)
        output = StringIO(raw_output.replace('\t\\N', '\t'))
        data = pandas.read_table(output, header=None, names=columns, parse_dates=True)
        # Re-order dataframe for consistent comparison:
        for frame in (data, expected):
            frame.sort(['payment_ref_id', 'transaction_type'], inplace=True, ascending=[True, False])
            frame.reset_index(drop=True, inplace=True)

        self.assert_data_frames_equal(data, expected)
    def test_end_to_end(self):
        self.task.launch([
            'BuildFinancialReportsTask',
            '--import-date', self.UPPER_BOUND_DATE,
            '--n-reduce-tasks', str(self.NUM_REDUCERS),
        ])

        final_output_task = LoadInternalReportingOrderTransactionsToWarehouse(
            import_date=luigi.DateParameter().parse(self.UPPER_BOUND_DATE)
        )
        columns = [x[0] for x in final_output_task.columns]

        with self.vertica.cursor() as cursor:
            expected_output_csv = os.path.join(self.data_dir, 'output', 'expected_financial_report.csv')

            expected_output_data = read_csv_fixture_as_list(expected_output_csv)

            expected = pandas.DataFrame(expected_output_data, columns=columns)

            cursor.execute("SELECT {columns} FROM {schema}.f_orderitem_transactions".format(
                columns=','.join(columns),
                schema=self.vertica.schema_name
            ))
            response = cursor.fetchall()

            f_orderitem_transactions = pandas.DataFrame(map(coerce_columns_to_string, response), columns=columns)

            for frame in (f_orderitem_transactions, expected):
                frame.sort(['payment_ref_id', 'transaction_type'], inplace=True, ascending=[True, False])
                frame.reset_index(drop=True, inplace=True)

            self.assert_data_frames_equal(f_orderitem_transactions, expected)
Example #4
0
 def requires(self):
     yield (
         TransactionReportTask(
             import_date=self.import_date,
             n_reduce_tasks=self.n_reduce_tasks,
         ),
         LoadInternalReportingOrderTransactionsToWarehouse(
             import_date=self.import_date,
             n_reduce_tasks=self.n_reduce_tasks,
             schema=self.schema,
             credentials=self.credentials,
             overwrite=self.overwrite,
             is_empty_transaction_allowed=self.is_empty_transaction_allowed
         ),
         LoadInternalReportingEdServicesReportToWarehouse(
             import_date=self.import_date,
             n_reduce_tasks=self.n_reduce_tasks,
             schema=self.schema,
             credentials=self.credentials,
             overwrite=self.overwrite,
         ),
         # The following task performs transaction reconciliation on a more complete order record.
         # Rather than hunt down all the places where the current table is being used, we instead
         # output a separate one.
         LoadInternalReportingFullOrderTransactionsToWarehouse(
             import_date=self.import_date,
             n_reduce_tasks=self.n_reduce_tasks,
             schema=self.schema,
             credentials=self.credentials,
             overwrite=self.overwrite,
             is_empty_transaction_allowed=self.is_empty_transaction_allowed
         ),
         # The following tasks output the order tables that were used in the above reconciliation.
         LoadInternalReportingFullShoppingcartOrdersToWarehouse(
             import_date=self.import_date,
             n_reduce_tasks=self.n_reduce_tasks,
             schema=self.schema,
             credentials=self.credentials,
             overwrite=self.overwrite,
         ),
         LoadInternalReportingFullOttoOrdersToWarehouse(
             import_date=self.import_date,
             n_reduce_tasks=self.n_reduce_tasks,
             schema=self.schema,
             credentials=self.credentials,
             overwrite=self.overwrite,
         ),
     )
 def requires(self):
     yield (
         TransactionReportTask(
             import_date=self.import_date,
             n_reduce_tasks=self.n_reduce_tasks,
         ),
         LoadInternalReportingOrderTransactionsToWarehouse(
             import_date=self.import_date,
             n_reduce_tasks=self.n_reduce_tasks,
             schema=self.schema,
             credentials=self.credentials,
             overwrite=self.overwrite,
         ),
         LoadInternalReportingEdServicesReportToWarehouse(
             import_date=self.import_date,
             n_reduce_tasks=self.n_reduce_tasks,
             schema=self.schema,
             credentials=self.credentials,
             overwrite=self.overwrite,
         ),
     )