def confirm_parents_nonmissing( self, summary_table ): # (SAS 209-220) query = _MISSING_PARENTS_QUERY.format( target_summary_table=summary_table, parent_group_cols = Joiner( self.parent_group_cols ), target_group_cols = Joiner( self.target_group_cols ) ) ans = self.db_context.execute( query ) n = ans[0][0] if n > 1: raise ValueError( "Some target groups contain more than one parent group" )
def confirm_groups_consistent( self, summary_table ): # (SAS 198-207) query = _CONFLICTING_GROUPS_QUERY.format( target_summary_table=summary_table, all_group_cols = Joiner( self.parent_group_cols, self.extra_target_group_cols ), target_group_cols = Joiner( self.target_group_cols ) ) ans = self.db_context.execute( query ) n = ans[0][0] if n > 1: raise ValueError( "Some target groups contain more than one parent group" )
def create_output( self, parent_summary, target_summary ): self.ds_out.add_all( self.parent_group_cols ) self.ds_out.add_all( self.extra_target_group_cols ) with get_temp_table(self.db_context) as ds_temp: ds_temp.add_all( self.ds_out ) ds_temp.add( FieldSpec( field_name="numerator", basic_type="FLOAT" ) ) ds_temp.add( FieldSpec( field_name="ttest_se", basic_type="FLOAT" ) ) ds_temp.add( FieldSpec( field_name="ttest_value", basic_type="FLOAT" ) ) ds_temp.add( FieldSpec( field_name="p_css", basic_type="FLOAT" ) ) ds_temp.add( FieldSpec( field_name="t_css", basic_type="FLOAT" ) ) ds_temp.add( FieldSpec( field_name="p_t", basic_type="FLOAT" ) ) ds_temp.add( FieldSpec( field_name="t_t", basic_type="FLOAT" ) ) ds_temp.add( FieldSpec( field_name="p_n", basic_type="FLOAT" ) ) ds_temp.add( FieldSpec( field_name="t_n", basic_type="FLOAT" ) ) self.db_context.executeNoResults( ds_temp.definition ) query = _TTEST_QUERY_1.format( ds_out=ds_temp, group_cols=Joiner( self.parent_group_cols, self.extra_target_group_cols ), parent_summary=parent_summary, target_summary=target_summary, parent_group_cols=Joiner( self.parent_group_cols), target_group_cols=Joiner( self.target_group_cols) ) self.db_context.executeNoResults( query ) query = _TTEST_QUERY_2.format( ds_out=ds_temp ) self.db_context.executeNoResults( query ) query = _TTEST_QUERY_3.format( ds_out=ds_temp ) self.db_context.executeNoResults( query ) query = _TTEST_QUERY_4.format( ds_out=ds_temp ) self.db_context.executeNoResults( query ) query = _TTEST_QUERY_5.format( ds_out=ds_temp, critval=self.critical_value ) self.db_context.executeNoResults( query ) query = _TTEST_QUERY_6.format( ds_out=ds_temp, critval=self.critical_value, output_col=self.output_col ) self.db_context.executeNoResults( query ) query = _TTEST_QUERY_7.format( ds_out=ds_temp, rrv=self.reciprocal_round_value, output_col=self.output_col ) self.db_context.executeNoResults( query ) self.db_context.executeNoResults( self.ds_out.definition ) query = _FINAL_OUTPUT_QUERY.format( ds_out=self.ds_out, ds_temp=ds_temp, columns=Joiner( self.ds_out ) ) self.db_context.executeNoResults( query )
def testJoinerItemFormat(self): cols = Joiner( ['column_1', 'column_2'] ) out = """SELECT {cols:delimiter=',', item='col', itemfmt='{alias}.{{col}}'} FROM {table} AS {alias}""".format( cols=cols , table="table_1", alias="A" ) self.assertEqual( 'SELECT A.column_1,A.column_2 FROM table_1 AS A', out, 'Wrong output \'{}\''.format( out ) )
def outputDistrictFiles(q, i, lock, outputDir, runContextName, districtTable, fields, starts, names, lengths, types, var_names, outformats): runContext = RunContext(runContextName) dbContext = runContext.getDBContext() while (True): lock.acquire() if q.empty(): lock.release() print "Thread %s is OVER " % i + str(datetime.datetime.now()) return else: district = q.get() lock.release() fh = open(outputDir + district + ".txt", "w") fhc = open(outputDir + district + ".csv", "w") rows = dbContext.execute( "SELECT {cols} FROM {districtTable} WHERE dcrxid='{district}' ORDER BY dcrxid, grade, ucrxlnm, ucrxfnm, lithocode, studentmid" .format(cols=Joiner(var_names), district=district, districtTable=districtTable)) lastname = '' for n in names: if n != lastname: fhc.write(n.upper() + ',') lastname = n fhc.write('\n') writeToFile(rows, fh, fhc, 0, fields, starts, names, lengths, types, var_names, outformats) fh.close() fhc.close()
def _process_required_key_dups_and_missing(self, db_context, input_table, side, allow_dups): # Define duplicates table dup_table = get_temp_table(db_context) dup_table.create_foreign_key(input_table, True, 'fk_{}_'.format(side)) dup_table.add_all(self.required_merge_keys) dup_table.add(FieldSpec(basic_type="TINYINT", field_name="has_dups")) dup_table.add(FieldSpec(basic_type="TINYINT", field_name="has_missing")) dup_table.add( FieldSpec(basic_type="TINYINT", field_name="has_dups_both")) db_context.executeNoResults(dup_table.definition) # Populate table query = _FIND_DUPLICATES_QUERY.format( dup_table=dup_table, input_table=input_table, foreign_key=Joiner(dup_table.foreign_keys[0]), required_key=Joiner(self.required_merge_keys), side=side) db_context.executeNoResults(query) # Define rejects table reject_table = get_temp_table(db_context) reject_table.create_foreign_key(input_table, True, 'fk_{}_'.format(side)) reject_table.add_all(self.required_merge_keys) reject_table.add( FieldSpec(basic_type="NVARCHAR", data_length=4000, field_name="merge_report")) db_context.executeNoResults(reject_table.definition) # Move missing keys to rejects table self._move_rejects(dup_table, reject_table, 0, "has_missing > 0", "'Missing required key on {}'".format(side)) # If required, move duplicates to rejects table if not allow_dups: self._move_rejects(dup_table, reject_table, 0, "has_dups > 0", "'Duplicate required key on {}'".format(side)) return dup_table, reject_table
def _copy_rejects(self, from_table, reject_table, key_index, condition, reason_expression): query = _ADD_REJECTS_QUERY.format( from_table=from_table, reject_table=reject_table, keys=Joiner(from_table.foreign_keys[key_index], self.required_merge_keys), reason_expression=reason_expression, condition=condition) self.db_context.executeNoResults(query)
def input_function( params, table ): # Pick all targets that have more than one person with get_temp_table( self.db_context ) as temp_table_1, get_temp_table( self.db_context ) as temp_table_2: query = """ SELECT DISTINCT {parentgroups}, {targetgroups} INTO {temp_table_1:qualified} FROM {table:qualified} WHERE ({where}) AND ({wheret}) AND {parentgroups} IS NOT NULL AND {targetgroups} IS NOT NULL GROUP BY {parentgroups}, {targetgroups} HAVING COUNT(1) > 1 """.format( table=table, temp_table_1=temp_table_1, **params ) self.db_context.executeNoResults(query) # From those, pick the first target in each parent query= """ SELECT {parentgroups}, {targetgroups}, ROW_NUMBER() OVER( PARTITION BY {parentgroups} ORDER BY {targetgroups} ) AS r1 INTO {temp_table_2:qualified} FROM {temp_table_1:qualified} """.format( temp_table_1=temp_table_1, temp_table_2=temp_table_2, **params ) self.db_context.executeNoResults(query) # For each selected target, pick the first two observations in_ds = get_temp_table( self.db_context ) query = """ SELECT {columns:itemfmt='C.{{}}'} INTO {in_ds} FROM ( SELECT {columns:itemfmt='A.{{}}'}, ROW_NUMBER() OVER( PARTITION BY A.{parentgroups}, A.{targetgroups} ORDER BY A.{targetgroups} ) AS r2 FROM ( SELECT {columns} FROM {table:qualified} WHERE ({where}) AND ({wheret}) AND {parentgroups} IS NOT NULL AND {targetgroups} IS NOT NULL ) AS A INNER JOIN ( SELECT {parentgroups}, {targetgroups} FROM {temp_table_2:qualified} WHERE r1=1 ) AS B ON A.{parentgroups}=B.{parentgroups} AND A.{targetgroups}=B.{targetgroups} ) AS C WHERE C.r2<=2 """.format( in_ds=in_ds, temp_table_2=temp_table_2, table=table, columns=Joiner( table ), **params ) self.db_context.executeNoResults(query) in_ds.populate_from_connection() return in_ds
def generate_summary_stats( self, where_expression, *args ): # (SAS 188-192; 228-233) group_cols = Joiner( *args ) summary_table = get_temp_table( self.db_context ) query = _SUMMARY_QUERY.format( input_col=self.input_col, summary_table=summary_table, ds_in=self.ds_in, group_cols=group_cols, where=where_expression ) self.db_context.executeNoResults( query ) summary_table.populate_from_connection return summary_table
def execute( self ): self.run_context.debug( "Running ttests" ) with get_temp_table( self.db_context ) as ttest_input_table, get_temp_table( self.db_context ) as ttest_output_table: ttest_level = TTestLevel( ds_in=ttest_input_table, ds_out=ttest_output_table, db_context=self.db_context, round_value=1 ) for target_level in self.target_levels: drop_table_if_exists( target_level.output_table ) self.db_context.executeNoResults( target_level.output_table.definition ) where_t = target_level.get_where_expression() self.db_context.executeNoResults( _FILL_OUTPUT_TABLE_QUERY.format( output_table=target_level.output_table, input_table=self.ds_in, target_id=target_level.output_table[ target_level.id ], where_t=where_t, )) vars_target = set( target_level.where ) vars_target.add( target_level.id ) for parent_level in target_level.contents: where_p = parent_level.get_where_expression() vars_parent = set( parent_level.where ) vars_parent.add( parent_level.id ) vars_parent.update( vars_target ) for row in parent_level.contents: vars_row = set( vars_parent ) vars_row.add( row.inputvar ) self.run_context.debug( "Running ttest for variable {} comparing {} to {}".format( row.inputvar, target_level.level, parent_level.level ) ) drop_table_if_exists( ttest_input_table ) query = _GET_INPUT_VARIABLES_QUERY.format( ds_in=self.ds_in, ttest_input_table=ttest_input_table, vars=Joiner( vars_row ), where_t=where_t, where_p=where_p ) self.db_context.executeNoResults( query ) ttest_level.input_col_name=row.inputvar ttest_level.output_col_name=row.outputvar ttest_level.critical_value=row.critval ttest_level.target_group_cols=[ target_level.id ] ttest_level.parent_group_cols=[ parent_level.id ] ttest_level.target_where_expression = where_t ttest_level.parent_where_expression = where_p ttest_level.execute() query = _ACCUMULATE_RESULTS_QUERY.format( output_table=target_level.output_table, ttest_table=ttest_output_table, output_var=row.outputvar, target_id=target_level.id ) self.db_context.executeNoResults( query )
def outputStateFile(i, outputDir, runContextName, stateTable, fields, starts, names, lengths, types, var_names, outformats): runContext = RunContext(runContextName) dbContext = runContext.getDBContext() num_rows = dbContext.execute( "SELECT count(*) FROM {stateTable}".format(stateTable=stateTable)) length = num_rows[0][0] / NUM_PROCESSES print 'starting to make wholestate' + str(datetime.datetime.now()) fh = open(outputDir + "WholeState%s.txt" % i, "w") rows = dbContext.execute( "SELECT {cols} FROM {stateTable} ORDER BY dcrxid, grade, ucrxlnm, ucrxfnm, lithocode, studentmid" .format(cols=Joiner(var_names), stateTable=stateTable)) #writeToFile(rows, fh, None, 1, fields, starts, names, lengths, types, var_names, outformats) if i == NUM_PROCESSES - 1: writeToFile(rows[(length * i):], fh, None, 1, fields, starts, names, lengths, types, var_names, outformats) else: writeToFile(rows[(length * i):(length * i + length)], fh, None, 1, fields, starts, names, lengths, types, var_names, outformats) fh.close()
def assemble_input_data_n_in_group( self, n_in_group, params, table ): in_ds = get_temp_table( self.db_context ) query = """ SELECT {columns:itemfmt='A.{{}}'} INTO {in_ds} FROM ( SELECT {columns}, DENSE_RANK() OVER( PARTITION BY {parentgroups} ORDER BY {targetgroups} ) AS r1, ROW_NUMBER() OVER( PARTITION BY {parentgroups}, {targetgroups} ORDER BY {targetgroups} ) AS r2 FROM {table_name:qualified} WHERE ({where}) AND ({wheret}) AND {parentgroups} IS NOT NULL AND {targetgroups} IS NOT NULL ) AS A WHERE A.r1<={n_in_group} AND A.r2=1 """.format( table_name=table, columns=Joiner( table ), n_in_group=n_in_group, in_ds=in_ds, **params ) self.db_context.executeNoResults(query) in_ds.populate_from_connection() return in_ds
def runRC(version, tideLayoutFile, tideSpecFile, tideDataDirectory, tideDataTestCopy, tideDataOutputFile, masterListDTN, dbContext, runContextName): testImport = 1 testMerge = 1 testOutput = 1 rcOutputDTN = 'rc%sFinal' % str(version) if testImport: drop_tables([ 'TideLayout', 'TideFile', 'TideFile0', 'TideFile1', 'TideFile2', 'TideFile3', 'TideFile4', 'TideFile5' ], dbContext) reader = SafeExcelReader(runContext) reader.db_context = dbContext reader.filename = tideLayoutFile reader.sheetName = "OH Ach Pre-Reporting" reader.outputTable = "TideLayout" reader.createTable() files = [] os.chdir(tideDataDirectory) for tideDataFile in glob.glob("*.txt"): files.append(tideDataFile) q = Queue() [q.put(x) for x in files] ps = [ Process(target=readTideFile, args=(q, i, tideDataDirectory, runContextName)) for i, x in enumerate(files) ] [p.start() for p in ps] [p.join() for p in ps] i = len(ps) query = "SELECT * INTO TideFile FROM (" query2 = "" #We created a table for every text file. Now, let's merge them together while i > 0: i -= 1 query += "SELECT * FROM TideFile" + str(i) + " " if (i > 0): query += " UNION ALL " else: query += ") as tmp;\n" query2 += " DROP TABLE TideFile%s; " % (i, ) query2 += " ALTER TABLE tidefile ALTER COLUMN bcrxnm_attend NVARCHAR(35); " dbContext.executeNoResults(query) dbContext.executeNoResults(query2) query = """ UPDATE TideFile SET ucrxgen=1 WHERE ucrxgen='F'; UPDATE TideFile SET ucrxgen=2 WHERE ucrxgen='M'; UPDATE TideFile SET migrant='' WHERE ucrxgen='0'; UPDATE TideFile SET dcrxid_home='', bcrxid_home='', dcrxnm_home='', bcrxnm_home='' WHERE dcrxid_home=dcrxid_attend; UPDATE TideFile SET bcrxid_home='', bcrxnm_home='' WHERE bcrxid_attend=bcrxid_home; UPDATE TideFile SET dcrxid_home='', bcrxid_home='', dcrxnm_home='', bcrxnm_home='' WHERE dcrxid_home=bcrxid_attend; ALTER TABLE TideFile DROP COLUMN filler0, filler1, filler2, filler3, filler4, filler5, filler6, filler7, drops1, drops2, drops3, preid_num1, preid_num2; UPDATE TideFile SET grade=NULL WHERE grade='.' UPDATE TideFile SET dob_day=NULL WHERE dob_day='.' UPDATE TideFile SET dob_month=NULL WHERE dob_month='.' UPDATE TideFile SET dob_year=NULL WHERE dob_year='.' """ dbContext.executeNoResults(query) if version == 1: dbContext.executeNoResults( " ALTER TABLE TideFile ALTER COLUMN ucrx_preid NVARCHAR(128) NOT NULL; " ) dbContext.executeNoResults( " ALTER TABLE TideFile ADD PRIMARY KEY (ucrx_preid); ") else: dbContext.executeNoResults( " ALTER TABLE TideFile ALTER COLUMN lithocode NVARCHAR(128) NOT NULL; " ) dbContext.executeNoResults( " ALTER TABLE TideFile ADD PRIMARY KEY (lithocode); ") if testMerge: tables = [ 'ds1_rm1_RC1', 'ds2_rm2_RC1', 'fuzzy_RC1', 'merge1_RC1', 'OGT_ScoreFile' ] drop_tables(tables, dbContext) query = """ SELECT * INTO OGT_ScoreFile FROM intakeFinal; UPDATE OGT_ScoreFile SET dcrxid_home='', bcrxid_home='', dcrxnm_home='', bcrxnm_home='', distrtype_home='', schtype_home='' WHERE dcrxid_home=dcrxid_attend; UPDATE OGT_ScoreFile SET bcrxid_home='', bcrxnm_home='', schtype_home='' WHERE bcrxid_attend=bcrxid_home; UPDATE OGT_ScoreFile SET dcrxid_home='', bcrxid_home='', dcrxnm_home='', bcrxnm_home='', distrtype_home='', schtype_home='' WHERE dcrxid_home=bcrxid_attend; UPDATE OGT_ScoreFile SET dcrxnm_home='', distrtype_home='' WHERE dcrxid_home=''; ALTER TABLE OGT_ScoreFile ALTER COLUMN id INT NOT NULL; ALTER TABLE OGT_ScoreFile ADD PRIMARY KEY (id); """ dbContext.executeNoResults(query) ######## MERGE ######## merge_def = MergeDef(dbContext) tideFile = dbContext.getTableSpec("TideFile") scoreFile = dbContext.getTableSpec("OGT_ScoreFile") merge_def.table_name = 'merge1_RC1' merge_def.left_input_table = scoreFile merge_def.right_input_table = tideFile merge_def.allow_dups_both = (False, ) merge_def.join_type = JOIN_TYPE_INNER merge_def.fuzzy_report_table = 'fuzzy_RC1' merge_def.left_remain_table = 'ds1_rm1_RC1' merge_def.right_remain_table = 'ds2_rm2_RC1' read_spec_file(tideSpecFile, merge_def) merge_def.execute() tables = [ 'dcrxid_home', 'bcrxid_all', 'dcrxid_all', 'blanklist', 'dcrxid_missing', rcOutputDTN, 'countyNameFmt', 'OGT_RC1_3', 'OGT_RC1_2', 'OGT_RC1_1', 'OGT_P2', 'OGT_P1', 'merge_master1', 'dedup_d2', 'dedup_b', 'dedup_d', 'finalResult_RC1', 'OGT_Tide' ] drop_tables(tables, dbContext) query = """ ALTER TABLE merge1_RC1 DROP COLUMN primary1, fk_right_1, fk_left_1; ALTER TABLE ds1_rm1_RC1 DROP COLUMN merge_report, fk_left_1; """ dbContext.executeNoResults(query) query = """ SELECT * INTO finalResult_RC1 FROM ( SELECT * FROM merge1_RC1 UNION ALL SELECT * FROM ds1_rm1_RC1 ) AS tmp; UPDATE finalResult_RC1 SET dcrxid_home='052555' WHERE dcrxid_home='053165'; UPDATE finalResult_RC1 SET dcrxid_home='052555' WHERE dcrxid_home='053645'; UPDATE finalResult_RC1 SET dcrxid_home='052514' WHERE dcrxid_home='052647'; UPDATE finalResult_RC1 SET dcrxid_home='052548' WHERE dcrxid_home='053652'; UPDATE finalResult_RC1 SET dcrxid_home='052530' WHERE dcrxid_home='053231'; UPDATE finalResult_RC1 SET dcrxid_home='000129' WHERE dcrxid_home='092247'; UPDATE finalResult_RC1 SET dcrxid_home='000129' WHERE dcrxid_home='064915'; UPDATE finalResult_RC1 SET dcrxid_home='052514' WHERE dcrxid_home='053454'; UPDATE finalResult_RC1 SET dcrxid_home='052563' WHERE dcrxid_home='053637'; UPDATE finalResult_RC1 SET dcrxid_home='052514' WHERE dcrxid_home='052878'; UPDATE finalResult_RC1 SET dcrxid_home='000129' WHERE dcrxid_home='090456'; UPDATE finalResult_RC1 SET dcrxid_home='' WHERE dcrxid_home='BBBBBB'; SELECT * INTO dedup_d FROM {masterList} SELECT * INTO dedup_d2 FROM {masterList} SELECT * INTO dedup_b FROM {masterList} """.format(masterList=masterListDTN) dbContext.executeNoResults(query) if version == 2: query = """ UPDATE finalResult_RC1 SET dcrxid_home='000129' WHERE dcrxid_home IN ('008071', '067629', '097923'); UPDATE finalResult_RC1 SET dcrxid_attend='051284' WHERE dcrxid_attend='051292'; UPDATE finalResult_RC1 SET dcrxid_attend='051060' WHERE dcrxid_attend='064998'; """ dbContext.executeNoResults(query) dbContext.executeNoResults(nodupkey("dedup_d", "dcrxid_c")) dbContext.executeNoResults(nodupkey("dedup_d2", "dcrxid_c")) dbContext.executeNoResults(nodupkey("dedup_b", "bcrxid_c")) query = """ ALTER TABLE dedup_d DROP COLUMN bcrxid_c, schtype, source, bcrxnm EXEC SP_RENAME 'dedup_d.dcrxid_c', 'dcrxid_home', 'COLUMN' EXEC SP_RENAME 'dedup_d.distrtype', 'distrtype_home', 'COLUMN' EXEC SP_RENAME 'dedup_d.dcrxnm', 'dcrxnm_home', 'COLUMN' ALTER TABLE dedup_b DROP COLUMN dcrxid_c, distrtype, source, dcrxnm EXEC SP_RENAME 'dedup_b.bcrxid_c', 'bcrxid_home', 'COLUMN' EXEC SP_RENAME 'dedup_b.schtype', 'schtype_home', 'COLUMN' EXEC SP_RENAME 'dedup_b.bcrxnm', 'bcrxnm_home', 'COLUMN' ALTER TABLE finalResult_RC1 DROP COLUMN dcrxnm_home, distrtype_home; """ dbContext.executeNoResults(query) query = """ SELECT F.*, D.[dcxx_county] as [dcxx_county2], D.[dcrxnm_home], D.[distrtype_home] INTO merge_master1 FROM finalResult_RC1 F LEFT JOIN dedup_d D ON (D.dcrxid_home = F.dcrxid_home); UPDATE merge_master1 SET dcxx_county=dcxx_county2 WHERE dcxx_county='' OR dcxx_county IS NULL; ALTER TABLE merge_master1 DROP COLUMN dcxx_county2, bcrxnm_home, schtype_home; """ dbContext.executeNoResults(query) query = """ SELECT F.*, D.[dcxx_county] as [dcxx_county2], D.[bcrxnm_home], D.[schtype_home] INTO OGT_RC1_1 FROM merge_master1 F LEFT JOIN dedup_b D ON (D.bcrxid_home = F.bcrxid_home) UPDATE OGT_RC1_1 SET dcxx_county=dcxx_county2 WHERE dcxx_county='' AND dcxx_county IS NULL; ALTER TABLE OGT_RC1_1 DROP COLUMN dcxx_county2; """ dbContext.executeNoResults(query) query = """ SELECT * INTO OGT_P2 FROM OGT_RC1_1 WHERE bcrxid_attend='999999' SELECT * INTO OGT_P1 FROM OGT_RC1_1 WHERE bcrxid_attend<>'999999' or bcrxid_attend is null; ALTER TABLE OGT_P1 DROP COLUMN bcrxnm_attend, dcrxnm_attend SELECT A.*, B.dcrxnm as dcrxnm_attend, B.bcrxnm as bcrxnm_attend INTO OGT_RC1_2 FROM OGT_P1 A LEFT JOIN %s B ON (A.bcrxid_attend = B.bcrxid_c and A.dcrxid_attend = B.dcrxid_c) ALTER TABLE OGT_P2 DROP COLUMN dcrxnm_attend SELECT A.*, B.dcrxnm as dcrxnm_attend INTO OGT_RC1_3 FROM OGT_P2 A LEFT JOIN dedup_d2 B ON (A.dcrxid_attend = B.dcrxid_c) """ % (masterListDTN) dbContext.executeNoResults(query) ts = get_table_spec("OGT_RC1_2", dbContext) ts.populate_from_connection() cols = [c.field_name for c in ts] query = """ SELECT * INTO {finalTable} FROM (SELECT {column_names} FROM OGT_RC1_2 UNION ALL SELECT {column_names} FROM OGT_RC1_3) as tmp; UPDATE {finalTable} SET dcxx_county=SUBSTRING(M.dcxx_county, 0, 30) FROM {finalTable} F LEFT JOIN {masterList} M ON (M.dcrxid_c = F.dcrxid_attend); """.format(finalTable=rcOutputDTN, masterList=masterListDTN, column_names=Joiner(cols)) dbContext.executeNoResults(query) if version == 1: query = """ Select LTRIM(RTRIM(ucrx_preid)) as ucrx_preid INTO blanklist FROM {finalTable} where ucrx_preid not in (SELECT ucrx_preid from Tidefile) and ucrx_preid<>'' and ucrx_preid is not null; UPDATE {finalTable} SET ucrx_preid='' WHERE ucrx_preid IN (SELECT ucrx_preid FROM blanklist); SELECT * INTO OGT_Tide FROM {finalTable} UPDATE OGT_Tide SET ucrxgen='F' WHERE ucrxgen='1' UPDATE OGT_Tide SET ucrxgen='M' WHERE ucrxgen='2' UPDATE OGT_Tide SET studentmid='' WHERE studentmid='*' """.format(finalTable=rcOutputDTN) dbContext.executeNoResults(query) lengths = dbContext.execute("SELECT length FROM TideLayout") var_names = dbContext.execute( "SELECT variable_name FROM TideLayout WHERE variable_name NOT IN ('filler0', 'filler1', 'filler2', 'filler3', 'filler4', 'filler5', 'filler6', 'filler7', 'drops1', 'drops2', 'drops3', 'preid_num1', 'preid_num2', 'ufxx_accel_test', 'upcx_DNS', 'upcx_INV', 'upmx_DNS', 'upmx_INV', 'uprx_DNS', 'uprx_INV', 'uprx_TOG', 'upsx_DNS', 'upsx_INV', 'upwx_DNS', 'upwx_INV' )" ) var_names_all = dbContext.execute( "SELECT variable_name FROM TideLayout") lengths = [l[0] for l in lengths] var_names = [v[0] for v in var_names] var_names_all = [v[0] for v in var_names_all] # Some RC versions seperate homeschool, some do not #rows = dbContext.execute( "SELECT {cols} FROM OGT_Tide ORDER BY CONVERT(binary(30), ssid), lithocode".format(cols=Joiner(var_names)) ) rows = dbContext.execute( "SELECT {cols} FROM OGT_Tide WHERE schtype_attend <> 'H' ORDER BY CONVERT(binary(30), ssid), lithocode" .format(cols=Joiner(var_names))) file_s = datetime.datetime.now() fh = open(tideDataOutputFile, "w") for row in rows: for i, v in enumerate(var_names_all): s = "{:<" + str(lengths[i]) + "}" if v in var_names: for j, v2 in enumerate(var_names): if (v == v2): if v2 in [ "grade", "dob_day", "dob_month", "dob_year" ]: if row[j] is None: s = "{:>" + str(lengths[i]) + "}" fh.write(s.format(".")) else: s = "{:0>" + str(lengths[i]) + "}" fh.write( s.format(int( row[j]))[0:lengths[i]]) else: fh.write( s.format(row[j] or "")[0:lengths[i]]) break else: fh.write(s.format("")) fh.write("\n") fh.close() print "time to write file: " + str(datetime.datetime.now() - file_s) if testOutput: f1 = open(tideDataOutputFile, 'r') f2 = open(tideDataTestCopy, 'r') lines1 = f1.readlines() lines2 = f2.readlines() for i, l1 in enumerate(lines1): if l1 != lines2[i]: print i print l1 print lines2[i] raise Exception print "Complete Match!" print "******** FINISHED ********" + str(datetime.datetime.now())
def testJoinerMultipleSequences(self): cols = Joiner( [ 'column_1 VARCHAR(255)', 'column_2 VARCHAR(255)', 'column_3 VARCHAR(255)' ], [], [ 'PRIMARY KEY(column_1)' ] ) out = "CREATE TABLE {table}({cols:delimiter=','})".format( cols=cols , table="table_1" ) self.assertEqual( 'CREATE TABLE table_1(column_1 VARCHAR(255),column_2 VARCHAR(255),column_3 VARCHAR(255),PRIMARY KEY(column_1))', out, 'Wrong output \'{}\''.format( out ) )
def test_21(self): answer_dir = os.path.join(self.answer_dir, 'test_21') if not os.path.exists(answer_dir): os.makedirs(answer_dir) specimen_dir = os.path.join(self.specimen_dir, 'ttest_test_21') result = True with self.read_g3() as g3, \ self.read_oat_agg_sheet() as agg_sheet, \ dbutilities.get_temp_table( self.db_context ) as tmp, \ dbutilities.get_temp_table( self.db_context ) as tmp_agg : # As near as I can tell, the SAS test only runs for the tenth row of the agg sheet. # self.db_context.executeNoResults("DELETE FROM {agg_sheet} WHERE [import_order] != 10".format( agg_sheet=agg_sheet )) # We are just using this TTest instance to read the aggregation sheet. The actual ttest will use # another instance based on a truncated aggregation sheet. agg_sheet_reader = TTest(g3, self.db_context, agg_sheet, None, False) agg_sheet_reader.readAggData() assert dbutilities.table_exists(g3) targetParentRow = [] for target_level in agg_sheet_reader.target_levels: for parent_level in target_level.contents: for row in parent_level.contents: targetParentRow.append( (target_level, parent_level, row)) targetParentRow.sort(key=lambda (row): row[2].import_order) for target_level, parent_level, row in targetParentRow: where_t = target_level.get_where_expression() target_id = target_level.id where_p = parent_level.get_where_expression() parent_id = parent_level.id i = row.import_order # Reduce the data to the desired sample dbutilities.drop_table_if_exists(tmp) query = """ SELECT {vars}, COUNT( {input_var} ) OVER( PARTITION BY {parent_id}, {target_id} ) AS n_target, 0 AS n_parent INTO {tmp} FROM {g3} WHERE {where_t} """.format(parent_id=parent_id, target_id=target_id, input_var=row.inputvar, where_t=where_t, tmp=tmp, g3=g3, vars=Joiner(g3)) self.db_context.executeNoResults(query) query = """ UPDATE {tmp} SET n_parent = A.B FROM ( SELECT n_parent, COUNT( {input_var} ) OVER( PARTITION BY {parent_id} ) AS B FROM {tmp} WHERE {where_p} ) AS A """.format(parent_id=parent_id, input_var=row.inputvar, where_p=where_p, tmp=tmp) print query self.db_context.executeNoResults(query) query = "DELETE FROM {tmp} WHERE ( n_parent != 2 ) OR ( n_target != 1 )".format( tmp=tmp) self.db_context.executeNoResults(query) n_obs = dbutilities.n_obs(tmp) if n_obs > 0: # Reduce the aggregation sheet to the current row query = "SELECT * INTO {tmp_agg} FROM {agg_sheet} WHERE [import_order]={i}".format( tmp_agg=tmp_agg, agg_sheet=agg_sheet, i=i) self.db_context.executeNoResults(query) # Do the ttest ttester = TTest(tmp, self.db_context, tmp_agg, None, False) ttester.readAggData() ttester.execute() # Check the answer answer_file = os.path.join( answer_dir, 'row_{0}_comparison.log'.format(i)) specimen_file = os.path.join( specimen_dir, 'test_21_ttest_{0}.xls'.format(i)) result_i = self.compare_output(specimen_file, target_level, answer_file) result = result and result_i print "{1} ttest test_21 for {0}".format( i, 'PASSED' if result_i else 'FAILED') self.assertTrue(result, "TTest Test 21 FAILED") return
db_context=dbContext) valid_cols += get_column_names( "{studentTable}2".format(studentTable=studentTable), db_context=dbContext) valid_cols = [i.replace('[', '') for i in valid_cols] valid_cols = [i.replace(']', '').lower() for i in valid_cols] cols = dbContext.execute( "SELECT variable FROM ogt_dl_vars WHERE LOWER([value type])='variable' --variable IS NOT NULL AND variable<>'''''' AND variable<>'' AND " ) cols = [c[0].lower() for c in cols] valid_cols = [c for c in valid_cols if c in cols] valid_cols += [ 'ufxx_sample', 'A.lithocode'.format(studentTable=studentTable) ] query = "SELECT {cols} INTO OGT_DL0 FROM {studentTable}0 A JOIN {studentTable}1 B ON (A.lithocode = B.lithocode) JOIN {studentTable}2 C ON (A.lithocode = C.lithocode)".format( cols=Joiner(valid_cols), studentTable=studentTable) dbContext.executeNoResults(query) drop_tables([ 'OGT_DL1', 'OGT_DL2', 'OGT_DL3', 'OGT_DL4', 'OGT_DL_tmp0', 'OGT_DL_tmp1' ], dbContext) query = """ UPDATE OGT_DL0 SET ucsx_classname=REPLACE(ucsx_classname,'[','('); UPDATE OGT_DL0 SET ucrxlnm=REPLACE(ucrxlnm,'`',''''); UPDATE OGT_DL0 SET ucrxfnm=REPLACE(ucrxfnm,'`',''''); UPDATE OGT_DL0 SET ucxx_room_number=REPLACE(ucxx_room_number,'?',''); UPDATE OGT_DL0 SET ucmx_coursecode=REPLACE(ucmx_coursecode,'`',''); DELETE FROM OGT_DL0 WHERE bcrxid_attend='000001'; UPDATE OGT_DL0 SET ssid='' WHERE ssid NOT LIKE '[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]_' UPDATE OGT_DL0 SET dcrxid_home=' ' WHERE dcrxid_home=' .';
def readAggData(self): # Validate the input file self.ds_in = get_table_spec( self.ds_in, self.db_context ) self.ds_in.populate_from_connection() self.db_context = self.ds_in.db_context self.run_context = self.db_context.runContext self.run_context.debug( "processing control file" ) if not table_exists( self.ds_in ): raise ValueError( "Input dataset {} does not exist".format( self.ds_in ) ) # Read the control file # SAS 3-9 if self.use_excel: reader = SafeExcelReader( self.run_context, self.agg_ds, self.agg_sheet ) self.agg_data = [ row for row in reader.getRows() ] else: self.agg_ds = get_table_spec( self.agg_ds, self.db_context ) self.agg_data = dump( self.agg_ds ) # Validate the control file columns # SAS 10-28 missing_vars = set() for var_name in [ 'outputvar', 'inputvar', 'targetlevel', 'targetid', 'wheret', 'wheret_value', 'parentlevel', 'parentid', 'wherep', 'wherep_value', 'critval' ]: if var_name not in self.agg_data[0]: missing_vars.add( var_name ) if missing_vars: raise ValueError( "TTest control sheet lacks required columns: {:', '}".format( Joiner( missing_vars ) ) ) # Validate existence of requested columns # SAS 29-86 for row in self.agg_data: if row.wheret is None: row.wheret = [] else: row.wheret = [ x.strip().lower() for x in row.wheret.strip().split( '*' )] if row.wherep is None: row.wherep = [] else: row.wherep = [ x.strip().lower() for x in row.wherep.strip().split( '*' )] if row.wheret_value is None: row.wheret_value = [] else: row.wheret_value = [ x.strip() for x in row.wheret_value.strip().split( ' ' )] if row.wherep_value is None: row.wherep_value = [] else: row.wherep_value = [ x.strip() for x in row.wherep_value.strip().split( ' ' )] row.inputvar = row.inputvar.lower().strip() row.targetid = row.targetid.lower().strip() row.parentid = row.parentid.lower().strip() row.targetlevel = row.targetlevel.lower().strip() row.parentlevel = row.parentlevel.lower().strip() for var_name in ( row.wheret + row.wherep + [ row.inputvar, row.targetid, row.parentid ] ): if var_name != '' and var_name not in self.ds_in: missing_vars.add( var_name ) if missing_vars: raise ValueError( "TTest input data lacks required variables: {:', '}".format( Joiner( missing_vars ) ) ) # Sort control data #SAS 87-90 self.agg_data.sort( key=lambda row : ( row.targetlevel, row.parentlevel ) ) # Check for consistency across "target" and "parent" variables. #SAS 91-222 last_targetlevel = _NONE_LEVEL last_parentlevel = _NONE_LEVEL self.target_levels = [] messages = [] for row in self.agg_data: wheret = tuple( row.wheret ) wheret_value = tuple ( row.wheret_value ) if len( wheret ) != len( wheret_value ): messages.append( 'Number of wheret_value items must match number of wheret items ("{0}" vs "{1}")'.format( row.wheret, row.wheret_value ) ) if row.targetlevel != last_targetlevel.level: last_targetlevel = LevelData( row.targetlevel, row.targetid, wheret, wheret_value ) self.target_levels.append( last_targetlevel ) last_parentlevel = _NONE_LEVEL # Create an output table in which to accumulate the results table_name = 'ttest_' + row.targetlevel last_targetlevel.output_table = TableSpec( self.db_context, table_name ) last_targetlevel.output_table.add( self.ds_in[ row.targetid ].clone() ) else: last_targetlevel.check( row.targetid, wheret, messages ) wherep = tuple( row.wherep ) wherep_value = tuple ( row.wherep_value ) if len( wherep ) != len( wherep_value ): messages.append( 'Number of wherep_value items must match number of wherep items ("{0}" vs "{1}")'.format( row.wherep, row.wherep_value ) ) if row.parentlevel != last_parentlevel.level: last_parentlevel = LevelData( row.parentlevel, row.parentid, wherep, wherep_value ) last_targetlevel.contents.append( last_parentlevel ) else: last_parentlevel.check( row.parentid, wherep, messages ) last_parentlevel.contents.append( row ) last_targetlevel.output_table.add( FieldSpec( row.outputvar, 'TINYINT' ) ) try: row.critval = float( row.critval ) if not MIN_CRITVAL <= row.critval <= MAX_CRITVAL: messages.append( "Bad critical value {} is not between {} and {}".format( row.critval, MIN_CRITVAL, MAX_CRITVAL ) ) except ValueError: messages.append( "Critical value {} is not a float".format( row.critval ) ) try: row.outputvar = db_identifier_quote( row.outputvar ) except ValueError: messages.append( "Output variable name {} is not a valid database identifier".format( row.outputvar ) ) try: row.targetlevel = db_identifier_quote( row.targetlevel ) except ValueError: messages.append( "Target level name {} is not a valid database identifier".format( row.targetlevel ) ) for message in messages: self.run_context.error( message ) if messages: raise ValueError( "Invalid inputs to ttest macro. See log for details" )
def testJoiner(self): cols = Joiner( ['column_1', 'column_2', 'column_3'] ) out = "SELECT {cols:delimiter=','} FROM {table}".format( cols=cols , table="table_1" ) self.assertEqual( 'SELECT column_1,column_2,column_3 FROM table_1', out, 'Wrong output \'{}\''.format( out ) )
def odeFilesMain(formatTable, stateTable, districtTable, runContextName): #output a text and cvs file for each dcrxid (i.e. 000123.cvs) #select data where dcrxid = this district's dcrxid #select entire dataset (order by dcrxid, grade, ucrxlnm, ucrxfnm var_names = dbContext.execute( "SELECT variable FROM {formatTable} WHERE [value type]='variable' order by start" .format(formatTable=formatTable)) var_names = [v[0] for v in var_names] #Check if variables listed in excel file exist in data - raise exception if they do not rows0 = dbContext.execute( "SELECT TOP 1 {cols} FROM {districtTable}".format( cols=Joiner(var_names), districtTable=districtTable)) rows0 = dbContext.execute( "SELECT TOP 1 {cols} FROM {stateTable}".format( cols=Joiner(var_names), stateTable=stateTable)) districts = dbContext.execute( "SELECT DISTINCT dcrxid FROM {districtTable} order by dcrxid". format(districtTable=districtTable)) lengths = dbContext.execute( "SELECT length FROM {formatTable} order by start".format( formatTable=formatTable)) fields = dbContext.execute( "SELECT variable FROM {formatTable} WHERE variable <> 'dcrxid' order by start" .format(formatTable=formatTable)) types = dbContext.execute( "SELECT [value type] FROM {formatTable} order by start".format( formatTable=formatTable)) const_names = dbContext.execute( "SELECT variable FROM {formatTable} WHERE [value type]<>'variable' order by start" .format(formatTable=formatTable)) starts = dbContext.execute( "SELECT start FROM {formatTable} order by start".format( formatTable=formatTable)) outformats = dbContext.execute( "SELECT [out format] FROM {formatTable} order by start".format( formatTable=formatTable)) names = dbContext.execute( "SELECT [ODE col name] FROM {formatTable} WHERE variable <> 'dcrxid' order by start" .format(formatTable=formatTable)) districts = [d[0] for d in districts] lengths = [l[0] for l in lengths] fields = [t[0] for t in fields] types = [t[0] for t in types] const_names = [c[0] for c in const_names] starts = [s[0] for s in starts] outformats = [o[0] for o in outformats] names = [n[0] for n in names] #outputDistrictFiles() #outputStateFile() #ps_state = Process(target=outputStateFile, args=(outputDir, runContextName, stateTable, fields, starts, names, lengths, types, var_names, outformats)) ps = [ Process(target=outputStateFile, args=(i, outputDir, runContextName, stateTable, fields, starts, names, lengths, types, var_names, outformats)) for i, x in enumerate(range(NUM_PROCESSES)) ] #ps_state.start() [p.start() for p in ps] [p.join() for p in ps] fh = open(outputDir + "WholeState.txt", "w") for i in range(NUM_PROCESSES): f = open(outputDir + "WholeState%s.txt" % i, "r") fh.write(f.read()) f.close() fh.close() print "STATE FILE COMPLETE " + str(datetime.datetime.now()) q = Queue() lock = Lock() [q.put(x) for x in districts] ps = [ Process(target=outputDistrictFiles, args=(q, i, lock, outputDir, runContextName, districtTable, fields, starts, names, lengths, types, var_names, outformats)) for i, x in enumerate(range(NUM_PROCESSES)) ] [p.start() for p in ps] [p.join() for p in ps]
def execute(self): # Validate inputs self.validate() # Delete output tables if they exist drop_table_if_exists(self) if self.fuzzy_report_table: drop_table_if_exists(self.fuzzy_report_table) if self.left_remain_table: drop_table_if_exists(self.left_remain_table) if self.right_remain_table: drop_table_if_exists(self.right_remain_table) # Scan for illegal duplication of required keys in both tables left_dup_table, left_reject_table = self._process_required_key_dups_and_missing( self.db_context, self.left_input_table, 'left', self.allow_dups_left) right_dup_table, right_reject_table = self._process_required_key_dups_and_missing( self.db_context, self.right_input_table, 'right', self.allow_dups_right) # We will create an initial table that contains only the required keys, # optional keys, fuzzy keys, foreign keys and the duplicate detection columns merge_table, left_fields, right_fields = self._create_merge_table() # If necessary, remove duplicates that appear in both tables if self.allow_dups_left and self.allow_dups_right and not self.allow_dups_both: with get_temp_table(self.db_context) as full_dup_table: query = _DUPS_BOTH_QUERY.format( left_dup_table=left_dup_table, right_dup_table=right_dup_table, full_dup_table=full_dup_table, key=Joiner(self.required_merge_keys)) self.db_context.executeNoResults(query) query = _MARK_DUPS_BOTH_QUERY.format( dup_table=left_dup_table, full_dup_table=full_dup_table, key=Joiner(self.required_merge_keys)) self.db_context.executeNoResults(query) self._move_rejects(left_dup_table, left_reject_table, 0, 'has_dups_both<>0', "'Duplicate required key on both sides'") query = _MARK_DUPS_BOTH_QUERY.format( dup_table=right_dup_table, full_dup_table=full_dup_table, key=Joiner(self.required_merge_keys)) self.db_context.executeNoResults(query) self._move_rejects(right_dup_table, right_reject_table, 0, 'has_dups_both<>0', "'Duplicate required key on both sides'") # Perform the first merge (required key) query = _FIRST_MERGE_QUERY.format( merge_table=merge_table, required_key=Joiner(self.required_merge_keys), foreign_key_l=Joiner(merge_table.foreign_keys[0]), foreign_key_r=Joiner(merge_table.foreign_keys[1]), left_dup_table=left_dup_table, right_dup_table=right_dup_table) self.db_context.executeNoResults(query) left_dup_table.drop() right_dup_table.drop() #Remove rejects after the first merge self._move_rejects(merge_table, left_reject_table, 0, 'has_dups_r IS NULL', "'No required key match'") self._move_rejects(merge_table, right_reject_table, 1, 'has_dups_l IS NULL', "'No required key match'") # Bring in optional and fuzzy keys if len(left_fields) > 0: query = _ADD_MORE_KEYS_QUERY.format( fields=Joiner(left_fields), merge_table=merge_table, from_table=self.left_input_table, side='left', required_key=Joiner(merge_table.foreign_keys[0])) self.db_context.executeNoResults(query) if len(right_fields) > 0: query = _ADD_MORE_KEYS_QUERY.format( fields=Joiner(right_fields), merge_table=merge_table, from_table=self.right_input_table, side='right', required_key=Joiner(merge_table.foreign_keys[1])) self.db_context.executeNoResults(query) key_fields = Joiner(self.required_merge_keys, merge_table.foreign_keys[0], merge_table.foreign_keys[1]) # Flag matches for rejection based on optional keys if len(self.optional_merge_keys) > 0: indices = Joiner(range(1, len(self.optional_merge_keys) + 1)) reason_expression = _OPTIONAL_MISMATCH_MESSAGE.format( indices=indices) query = _OPTIONAL_MISMATCH_QUERY.format( merge_table=merge_table, indices=indices, reason_expression=reason_expression) self.db_context.executeNoResults(query) # Flag matches for rejection based on fuzzy keys if len(self.fuzzy_merge_keys) > 0: indices = Joiner(range(1, len(self.fuzzy_merge_keys) + 1)) query = _FUZZY_MATCH_QUERY.format(merge_table=merge_table, indices=indices, schema=self.db_context.schema) self.db_context.executeNoResults(query) # Create fuzzy report table if self.fuzzy_report_table is not None: query = _FUZZY_MATCH_REPORT_QUERY.format( key_fields=key_fields, indices=indices, report_table=self.fuzzy_report_table, merge_table=merge_table) self.db_context.executeNoResults(query) # Drop fuzzy mismatches reason_expression = _FUZZY_MISMATCH_MESSAGE.format(indices=indices) query = _FUZZY_MISMATCH_QUERY.format( merge_table=merge_table, thresholds=Joiner(self.similarity_thresholds), schema=self.db_context.schema, reason_expression=reason_expression) self.db_context.executeNoResults(query) # Move keys rejected due to optional or fuzzy matches reason_expression = "{fld:qualified}".format( fld=merge_table['merge_report']) self._copy_rejects(merge_table, left_reject_table, 0, '(reject<>0)', reason_expression) self._copy_rejects(merge_table, right_reject_table, 1, '(reject<>0)', reason_expression) self._delete_rows(merge_table, '(reject<>0)') # Meld columns in main merge table, including data columns that did not participate in the merge self.left_input_table.alias = "L" self.right_input_table.alias = "R" merge_table.alias = "A" key_field_names = [x.field_name for x in key_fields] original_fields = self[:] meld_fields = [x for x in self if x.field_name not in key_field_names] for key_field in key_fields: if key_field not in self: self.add(key_field.clone()) self.db_context.executeNoResults(self.definition) query = _MELD_FIELDS_QUERY.format( merge_table=merge_table, out_table=self, meld_fields=Joiner(meld_fields), key_fields=key_fields, left_table=self.left_input_table, left_key=Joiner(merge_table.foreign_keys[0]), right_table=self.right_input_table, right_key=Joiner(merge_table.foreign_keys[1])) self.db_context.executeNoResults(query) # Add non-matched records for outer joins if self.join_type in (JOIN_TYPE_LEFT, JOIN_TYPE_FULL): query = _RESTORE_REJECTS_QUERY.format( out_table=self, reject_table=left_reject_table, input_table=self.left_input_table, fields=Joiner(original_fields), foreign_key=Joiner(left_reject_table.foreign_keys[0]), side='left') self.db_context.executeNoResults(query) if self.join_type in (JOIN_TYPE_RIGHT, JOIN_TYPE_FULL): query = _RESTORE_REJECTS_QUERY.format( out_table=self, reject_table=right_reject_table, input_table=self.right_input_table, fields=Joiner(original_fields), foreign_key=Joiner(right_reject_table.foreign_keys[0]), side='right') self.db_context.executeNoResults(query) # Bring into the remainder tables the data columns that did not participate in the merge if self.left_remain_table is not None and self.join_type in ( JOIN_TYPE_RIGHT, JOIN_TYPE_INNER): # Create the table del self.left_remain_table[:] self.left_remain_table.add_all(left_reject_table.foreign_keys[0]) self.left_remain_table.add_all(meld_fields) self.left_remain_table.add(left_reject_table.merge_report.clone()) self.db_context.executeNoResults(self.left_remain_table.definition) left_reject_table.alias = "A" query = _REMAIN_TABLE_QUERY.format( key_fields=Joiner(left_reject_table.foreign_keys[0]), data_fields=Joiner(meld_fields), remain_table=self.left_remain_table, reject_table=left_reject_table, input_table=self.left_input_table, side='left') self.db_context.executeNoResults(query) if self.right_remain_table is not None and self.join_type in ( JOIN_TYPE_LEFT, JOIN_TYPE_INNER): del self.right_remain_table[:] self.right_remain_table.add_all(right_reject_table.foreign_keys[0]) self.right_remain_table.add_all(meld_fields) self.right_remain_table.add( right_reject_table.merge_report.clone()) self.db_context.executeNoResults( self.right_remain_table.definition) right_reject_table.alias = "A" query = _REMAIN_TABLE_QUERY.format( key_fields=Joiner(right_reject_table.foreign_keys[0]), data_fields=Joiner(meld_fields), remain_table=self.right_remain_table, reject_table=right_reject_table, input_table=self.right_input_table, side='right') self.db_context.executeNoResults(query) left_reject_table.drop() right_reject_table.drop() merge_table.drop()