예제 #1
0
 def confirm_parents_nonmissing( self, summary_table ):
     # (SAS 209-220)
     query = _MISSING_PARENTS_QUERY.format(
             target_summary_table=summary_table,
             parent_group_cols = Joiner( self.parent_group_cols ),
             target_group_cols = Joiner( self.target_group_cols ) )
     ans = self.db_context.execute( query )
     n = ans[0][0]
     if n > 1:
         raise ValueError( "Some target groups contain more than one parent group" )
예제 #2
0
 def confirm_groups_consistent( self, summary_table ):
     # (SAS 198-207)
     query = _CONFLICTING_GROUPS_QUERY.format(
             target_summary_table=summary_table,
             all_group_cols = Joiner( self.parent_group_cols, self.extra_target_group_cols ),
             target_group_cols = Joiner( self.target_group_cols ) )
     ans = self.db_context.execute( query )
     n = ans[0][0]
     if n > 1:
         raise ValueError( "Some target groups contain more than one parent group" )
예제 #3
0
    def create_output( self, parent_summary, target_summary ):
        
        self.ds_out.add_all( self.parent_group_cols )
        self.ds_out.add_all( self.extra_target_group_cols )

        with get_temp_table(self.db_context) as ds_temp:
            ds_temp.add_all( self.ds_out )
            ds_temp.add( FieldSpec( field_name="numerator", basic_type="FLOAT" ) )
            ds_temp.add( FieldSpec( field_name="ttest_se", basic_type="FLOAT" ) )
            ds_temp.add( FieldSpec( field_name="ttest_value", basic_type="FLOAT" ) )
            ds_temp.add( FieldSpec( field_name="p_css", basic_type="FLOAT" ) )
            ds_temp.add( FieldSpec( field_name="t_css", basic_type="FLOAT" ) )
            ds_temp.add( FieldSpec( field_name="p_t", basic_type="FLOAT" ) )
            ds_temp.add( FieldSpec( field_name="t_t", basic_type="FLOAT" ) )
            ds_temp.add( FieldSpec( field_name="p_n", basic_type="FLOAT" ) )
            ds_temp.add( FieldSpec( field_name="t_n", basic_type="FLOAT" ) )
            self.db_context.executeNoResults( ds_temp.definition )
            
            query = _TTEST_QUERY_1.format(
                    ds_out=ds_temp,
                    group_cols=Joiner( self.parent_group_cols, self.extra_target_group_cols ),
                    parent_summary=parent_summary,
                    target_summary=target_summary,
                    parent_group_cols=Joiner( self.parent_group_cols),
                    target_group_cols=Joiner( self.target_group_cols) )
            self.db_context.executeNoResults( query )
            
            query = _TTEST_QUERY_2.format( ds_out=ds_temp )
            self.db_context.executeNoResults( query )
    
            query = _TTEST_QUERY_3.format( ds_out=ds_temp )
            self.db_context.executeNoResults( query )
    
            query = _TTEST_QUERY_4.format( ds_out=ds_temp )
            self.db_context.executeNoResults( query )
    
            query = _TTEST_QUERY_5.format( ds_out=ds_temp,
                                           critval=self.critical_value )
            self.db_context.executeNoResults( query )
    
            query = _TTEST_QUERY_6.format( ds_out=ds_temp,
                                           critval=self.critical_value,
                                           output_col=self.output_col )
            self.db_context.executeNoResults( query )
            
            query = _TTEST_QUERY_7.format( ds_out=ds_temp,
                                           rrv=self.reciprocal_round_value,
                                           output_col=self.output_col )
            self.db_context.executeNoResults( query )
            
            self.db_context.executeNoResults( self.ds_out.definition )
            query = _FINAL_OUTPUT_QUERY.format( ds_out=self.ds_out,
                                                ds_temp=ds_temp,
                                                columns=Joiner( self.ds_out ) )
            self.db_context.executeNoResults( query )
 def testJoinerItemFormat(self):
     cols = Joiner( ['column_1', 'column_2'] )
     out = """SELECT {cols:delimiter=',',
                     item='col',
                     itemfmt='{alias}.{{col}}'} FROM {table} AS {alias}""".format( cols=cols , table="table_1", alias="A" )
     self.assertEqual( 'SELECT A.column_1,A.column_2 FROM table_1 AS A',
                       out, 'Wrong output \'{}\''.format( out ) )
예제 #5
0
def outputDistrictFiles(q, i, lock, outputDir, runContextName, districtTable,
                        fields, starts, names, lengths, types, var_names,
                        outformats):
    runContext = RunContext(runContextName)
    dbContext = runContext.getDBContext()
    while (True):
        lock.acquire()
        if q.empty():
            lock.release()
            print "Thread %s is OVER " % i + str(datetime.datetime.now())
            return
        else:
            district = q.get()
            lock.release()

        fh = open(outputDir + district + ".txt", "w")
        fhc = open(outputDir + district + ".csv", "w")
        rows = dbContext.execute(
            "SELECT {cols} FROM {districtTable} WHERE dcrxid='{district}' ORDER BY dcrxid, grade, ucrxlnm, ucrxfnm, lithocode, studentmid"
            .format(cols=Joiner(var_names),
                    district=district,
                    districtTable=districtTable))
        lastname = ''
        for n in names:
            if n != lastname:
                fhc.write(n.upper() + ',')
            lastname = n
        fhc.write('\n')
        writeToFile(rows, fh, fhc, 0, fields, starts, names, lengths, types,
                    var_names, outformats)
        fh.close()
        fhc.close()
예제 #6
0
    def _process_required_key_dups_and_missing(self, db_context, input_table,
                                               side, allow_dups):

        # Define duplicates table
        dup_table = get_temp_table(db_context)
        dup_table.create_foreign_key(input_table, True, 'fk_{}_'.format(side))
        dup_table.add_all(self.required_merge_keys)
        dup_table.add(FieldSpec(basic_type="TINYINT", field_name="has_dups"))
        dup_table.add(FieldSpec(basic_type="TINYINT",
                                field_name="has_missing"))
        dup_table.add(
            FieldSpec(basic_type="TINYINT", field_name="has_dups_both"))
        db_context.executeNoResults(dup_table.definition)

        # Populate table
        query = _FIND_DUPLICATES_QUERY.format(
            dup_table=dup_table,
            input_table=input_table,
            foreign_key=Joiner(dup_table.foreign_keys[0]),
            required_key=Joiner(self.required_merge_keys),
            side=side)
        db_context.executeNoResults(query)

        # Define rejects table
        reject_table = get_temp_table(db_context)
        reject_table.create_foreign_key(input_table, True,
                                        'fk_{}_'.format(side))
        reject_table.add_all(self.required_merge_keys)
        reject_table.add(
            FieldSpec(basic_type="NVARCHAR",
                      data_length=4000,
                      field_name="merge_report"))
        db_context.executeNoResults(reject_table.definition)

        # Move missing keys to rejects table
        self._move_rejects(dup_table, reject_table, 0, "has_missing > 0",
                           "'Missing required key on {}'".format(side))

        # If required, move duplicates to rejects table
        if not allow_dups:
            self._move_rejects(dup_table, reject_table, 0, "has_dups > 0",
                               "'Duplicate required key on {}'".format(side))

        return dup_table, reject_table
예제 #7
0
 def _copy_rejects(self, from_table, reject_table, key_index, condition,
                   reason_expression):
     query = _ADD_REJECTS_QUERY.format(
         from_table=from_table,
         reject_table=reject_table,
         keys=Joiner(from_table.foreign_keys[key_index],
                     self.required_merge_keys),
         reason_expression=reason_expression,
         condition=condition)
     self.db_context.executeNoResults(query)
 def input_function( params, table ):
       
     # Pick all targets that have more than one person
     with get_temp_table( self.db_context ) as temp_table_1, get_temp_table( self.db_context ) as temp_table_2:
         query = """
         SELECT DISTINCT {parentgroups}, {targetgroups}
         INTO {temp_table_1:qualified}
         FROM {table:qualified}
         WHERE ({where}) AND ({wheret})
             AND {parentgroups} IS NOT NULL
             AND {targetgroups} IS NOT NULL
         GROUP BY {parentgroups}, {targetgroups}
         HAVING COUNT(1) > 1
         """.format( table=table, temp_table_1=temp_table_1, **params )
         self.db_context.executeNoResults(query)
           
         # From those, pick the first target in each parent
         query= """
         SELECT {parentgroups}, {targetgroups},
         ROW_NUMBER() OVER( PARTITION BY {parentgroups} ORDER BY {targetgroups} ) AS r1
         INTO {temp_table_2:qualified}
         FROM {temp_table_1:qualified}
         """.format( temp_table_1=temp_table_1, temp_table_2=temp_table_2, **params )
         self.db_context.executeNoResults(query)
           
         # For each selected target, pick the first two observations
         in_ds = get_temp_table( self.db_context )
         query = """
         SELECT {columns:itemfmt='C.{{}}'}
         INTO {in_ds}
         FROM (
             SELECT {columns:itemfmt='A.{{}}'},
             ROW_NUMBER() OVER( PARTITION BY A.{parentgroups}, A.{targetgroups} ORDER BY A.{targetgroups} ) AS r2
             FROM (
                 SELECT {columns}
                 FROM {table:qualified}
                 WHERE ({where}) AND ({wheret})
                     AND {parentgroups} IS NOT NULL
                     AND {targetgroups} IS NOT NULL
             ) AS A INNER JOIN (
                 SELECT {parentgroups}, {targetgroups}
                 FROM {temp_table_2:qualified}
                 WHERE r1=1
             ) AS B
             ON A.{parentgroups}=B.{parentgroups} AND A.{targetgroups}=B.{targetgroups}
         ) AS C
         WHERE C.r2<=2
         """.format( in_ds=in_ds, temp_table_2=temp_table_2,
                     table=table,
                     columns=Joiner( table ), **params )
         self.db_context.executeNoResults(query)
     in_ds.populate_from_connection()
     return in_ds
예제 #9
0
 def generate_summary_stats( self, where_expression, *args ):
     # (SAS 188-192; 228-233)
     group_cols = Joiner( *args )
     summary_table = get_temp_table( self.db_context )
     query = _SUMMARY_QUERY.format( input_col=self.input_col,
                                    summary_table=summary_table,
                                    ds_in=self.ds_in,
                                    group_cols=group_cols,
                                    where=where_expression )
     self.db_context.executeNoResults( query )
     summary_table.populate_from_connection
     return summary_table
예제 #10
0
 def execute( self ):
     
     self.run_context.debug( "Running ttests" )
     with get_temp_table( self.db_context ) as ttest_input_table, get_temp_table( self.db_context ) as ttest_output_table:
         ttest_level = TTestLevel( ds_in=ttest_input_table, ds_out=ttest_output_table, db_context=self.db_context,
                                   round_value=1 )
         
         for target_level in self.target_levels:
             drop_table_if_exists( target_level.output_table )
             self.db_context.executeNoResults( target_level.output_table.definition )
             where_t = target_level.get_where_expression()
             self.db_context.executeNoResults( _FILL_OUTPUT_TABLE_QUERY.format(
                     output_table=target_level.output_table,
                     input_table=self.ds_in,
                     target_id=target_level.output_table[ target_level.id ],
                     where_t=where_t,
             ))
             vars_target = set( target_level.where )
             vars_target.add( target_level.id )
             for parent_level in target_level.contents:
                 where_p = parent_level.get_where_expression()
                 vars_parent = set( parent_level.where )
                 vars_parent.add( parent_level.id )
                 vars_parent.update( vars_target )
                 for row in parent_level.contents:
                     vars_row = set( vars_parent )
                     vars_row.add( row.inputvar )
                     
                     self.run_context.debug( "Running ttest for variable {} comparing {} to {}".format(
                             row.inputvar, target_level.level, parent_level.level ) )
                     drop_table_if_exists( ttest_input_table )
                     query = _GET_INPUT_VARIABLES_QUERY.format( ds_in=self.ds_in, ttest_input_table=ttest_input_table,
                             vars=Joiner( vars_row ),
                             where_t=where_t, where_p=where_p )
                     self.db_context.executeNoResults( query )
                     
                     ttest_level.input_col_name=row.inputvar
                     ttest_level.output_col_name=row.outputvar
                     ttest_level.critical_value=row.critval
                     ttest_level.target_group_cols=[ target_level.id ]
                     ttest_level.parent_group_cols=[ parent_level.id ]
                     ttest_level.target_where_expression = where_t
                     ttest_level.parent_where_expression = where_p
                     
                     ttest_level.execute()
                     
                     query = _ACCUMULATE_RESULTS_QUERY.format( output_table=target_level.output_table,
                             ttest_table=ttest_output_table, output_var=row.outputvar, target_id=target_level.id )
                     self.db_context.executeNoResults( query )
예제 #11
0
def outputStateFile(i, outputDir, runContextName, stateTable, fields, starts,
                    names, lengths, types, var_names, outformats):
    runContext = RunContext(runContextName)
    dbContext = runContext.getDBContext()

    num_rows = dbContext.execute(
        "SELECT count(*) FROM {stateTable}".format(stateTable=stateTable))
    length = num_rows[0][0] / NUM_PROCESSES

    print 'starting to make wholestate' + str(datetime.datetime.now())
    fh = open(outputDir + "WholeState%s.txt" % i, "w")
    rows = dbContext.execute(
        "SELECT {cols} FROM {stateTable} ORDER BY dcrxid, grade, ucrxlnm, ucrxfnm, lithocode, studentmid"
        .format(cols=Joiner(var_names), stateTable=stateTable))
    #writeToFile(rows, fh, None, 1, fields, starts, names, lengths, types, var_names, outformats)
    if i == NUM_PROCESSES - 1:
        writeToFile(rows[(length * i):], fh, None, 1, fields, starts, names,
                    lengths, types, var_names, outformats)
    else:
        writeToFile(rows[(length * i):(length * i + length)], fh, None, 1,
                    fields, starts, names, lengths, types, var_names,
                    outformats)
    fh.close()
    def assemble_input_data_n_in_group( self, n_in_group, params, table ):

        in_ds = get_temp_table( self.db_context )
        query = """
        SELECT {columns:itemfmt='A.{{}}'}
        INTO {in_ds}
        FROM (
            SELECT {columns},
            DENSE_RANK() OVER( PARTITION BY {parentgroups} ORDER BY {targetgroups} ) AS r1,
            ROW_NUMBER() OVER( PARTITION BY {parentgroups}, {targetgroups} ORDER BY {targetgroups} ) AS r2
            FROM {table_name:qualified}
            WHERE ({where}) AND ({wheret})
                AND {parentgroups} IS NOT NULL
                AND {targetgroups} IS NOT NULL
        ) AS A
        WHERE A.r1<={n_in_group} AND A.r2=1
        """.format( table_name=table,
                    columns=Joiner( table ),
                    n_in_group=n_in_group,
                    in_ds=in_ds,
                    **params )
        self.db_context.executeNoResults(query)
        in_ds.populate_from_connection()
        return in_ds
    def runRC(version, tideLayoutFile, tideSpecFile, tideDataDirectory,
              tideDataTestCopy, tideDataOutputFile, masterListDTN, dbContext,
              runContextName):

        testImport = 1
        testMerge = 1
        testOutput = 1
        rcOutputDTN = 'rc%sFinal' % str(version)

        if testImport:
            drop_tables([
                'TideLayout', 'TideFile', 'TideFile0', 'TideFile1',
                'TideFile2', 'TideFile3', 'TideFile4', 'TideFile5'
            ], dbContext)
            reader = SafeExcelReader(runContext)
            reader.db_context = dbContext
            reader.filename = tideLayoutFile
            reader.sheetName = "OH Ach Pre-Reporting"
            reader.outputTable = "TideLayout"
            reader.createTable()

            files = []
            os.chdir(tideDataDirectory)
            for tideDataFile in glob.glob("*.txt"):
                files.append(tideDataFile)
            q = Queue()
            [q.put(x) for x in files]
            ps = [
                Process(target=readTideFile,
                        args=(q, i, tideDataDirectory, runContextName))
                for i, x in enumerate(files)
            ]
            [p.start() for p in ps]
            [p.join() for p in ps]

            i = len(ps)
            query = "SELECT * INTO TideFile FROM ("
            query2 = ""
            #We created a table for every text file. Now, let's merge them together
            while i > 0:
                i -= 1
                query += "SELECT * FROM TideFile" + str(i) + " "
                if (i > 0):
                    query += " UNION ALL "
                else:
                    query += ") as tmp;\n"
                query2 += " DROP TABLE TideFile%s; " % (i, )
            query2 += " ALTER TABLE tidefile ALTER COLUMN bcrxnm_attend NVARCHAR(35); "
            dbContext.executeNoResults(query)
            dbContext.executeNoResults(query2)
            query = """
            UPDATE TideFile SET ucrxgen=1 WHERE ucrxgen='F';
            UPDATE TideFile SET ucrxgen=2 WHERE ucrxgen='M';
            UPDATE TideFile SET migrant='' WHERE ucrxgen='0';
            UPDATE TideFile SET dcrxid_home='', bcrxid_home='', dcrxnm_home='', bcrxnm_home='' WHERE dcrxid_home=dcrxid_attend;
            UPDATE TideFile SET bcrxid_home='', bcrxnm_home='' WHERE bcrxid_attend=bcrxid_home;
            UPDATE TideFile SET dcrxid_home='', bcrxid_home='', dcrxnm_home='', bcrxnm_home='' WHERE dcrxid_home=bcrxid_attend;
            ALTER TABLE TideFile DROP COLUMN filler0, filler1, filler2, filler3, filler4, filler5, filler6, filler7, drops1, drops2, drops3, preid_num1, preid_num2;
            
            UPDATE TideFile SET grade=NULL WHERE grade='.'
            UPDATE TideFile SET dob_day=NULL WHERE dob_day='.'
            UPDATE TideFile SET dob_month=NULL WHERE dob_month='.'
            UPDATE TideFile SET dob_year=NULL WHERE dob_year='.'
            """
            dbContext.executeNoResults(query)
            if version == 1:
                dbContext.executeNoResults(
                    " ALTER TABLE TideFile ALTER COLUMN ucrx_preid NVARCHAR(128) NOT NULL; "
                )
                dbContext.executeNoResults(
                    " ALTER TABLE TideFile ADD PRIMARY KEY (ucrx_preid); ")
            else:
                dbContext.executeNoResults(
                    " ALTER TABLE TideFile ALTER COLUMN lithocode NVARCHAR(128) NOT NULL; "
                )
                dbContext.executeNoResults(
                    " ALTER TABLE TideFile ADD PRIMARY KEY (lithocode); ")

        if testMerge:
            tables = [
                'ds1_rm1_RC1', 'ds2_rm2_RC1', 'fuzzy_RC1', 'merge1_RC1',
                'OGT_ScoreFile'
            ]
            drop_tables(tables, dbContext)

            query = """
            SELECT * INTO OGT_ScoreFile FROM intakeFinal;
            UPDATE OGT_ScoreFile SET dcrxid_home='', bcrxid_home='', dcrxnm_home='', bcrxnm_home='', distrtype_home='', schtype_home='' WHERE dcrxid_home=dcrxid_attend;
            UPDATE OGT_ScoreFile SET bcrxid_home='', bcrxnm_home='', schtype_home='' WHERE bcrxid_attend=bcrxid_home;
            UPDATE OGT_ScoreFile SET dcrxid_home='', bcrxid_home='', dcrxnm_home='', bcrxnm_home='', distrtype_home='', schtype_home='' WHERE dcrxid_home=bcrxid_attend;
            UPDATE OGT_ScoreFile SET dcrxnm_home='', distrtype_home='' WHERE dcrxid_home='';
            ALTER TABLE OGT_ScoreFile ALTER COLUMN id INT NOT NULL;
            ALTER TABLE OGT_ScoreFile ADD PRIMARY KEY (id);
            """
            dbContext.executeNoResults(query)

            ######## MERGE ########
            merge_def = MergeDef(dbContext)
            tideFile = dbContext.getTableSpec("TideFile")
            scoreFile = dbContext.getTableSpec("OGT_ScoreFile")
            merge_def.table_name = 'merge1_RC1'
            merge_def.left_input_table = scoreFile
            merge_def.right_input_table = tideFile
            merge_def.allow_dups_both = (False, )
            merge_def.join_type = JOIN_TYPE_INNER
            merge_def.fuzzy_report_table = 'fuzzy_RC1'
            merge_def.left_remain_table = 'ds1_rm1_RC1'
            merge_def.right_remain_table = 'ds2_rm2_RC1'
            read_spec_file(tideSpecFile, merge_def)
            merge_def.execute()

        tables = [
            'dcrxid_home', 'bcrxid_all', 'dcrxid_all', 'blanklist',
            'dcrxid_missing', rcOutputDTN, 'countyNameFmt', 'OGT_RC1_3',
            'OGT_RC1_2', 'OGT_RC1_1', 'OGT_P2', 'OGT_P1', 'merge_master1',
            'dedup_d2', 'dedup_b', 'dedup_d', 'finalResult_RC1', 'OGT_Tide'
        ]
        drop_tables(tables, dbContext)
        query = """
        ALTER TABLE merge1_RC1 DROP COLUMN primary1, fk_right_1, fk_left_1;
        ALTER TABLE ds1_rm1_RC1 DROP COLUMN merge_report, fk_left_1;
        """
        dbContext.executeNoResults(query)
        query = """
        
        SELECT *
        INTO finalResult_RC1
        FROM (
        SELECT * FROM merge1_RC1
        UNION ALL
        SELECT * FROM ds1_rm1_RC1
        ) AS tmp;
        
        UPDATE finalResult_RC1 SET dcrxid_home='052555' WHERE dcrxid_home='053165';
        UPDATE finalResult_RC1 SET dcrxid_home='052555' WHERE dcrxid_home='053645';
        UPDATE finalResult_RC1 SET dcrxid_home='052514' WHERE dcrxid_home='052647';
        UPDATE finalResult_RC1 SET dcrxid_home='052548' WHERE dcrxid_home='053652';
        UPDATE finalResult_RC1 SET dcrxid_home='052530' WHERE dcrxid_home='053231';
        UPDATE finalResult_RC1 SET dcrxid_home='000129' WHERE dcrxid_home='092247';
        UPDATE finalResult_RC1 SET dcrxid_home='000129' WHERE dcrxid_home='064915';
        UPDATE finalResult_RC1 SET dcrxid_home='052514' WHERE dcrxid_home='053454';
        UPDATE finalResult_RC1 SET dcrxid_home='052563' WHERE dcrxid_home='053637';
        UPDATE finalResult_RC1 SET dcrxid_home='052514' WHERE dcrxid_home='052878';
        UPDATE finalResult_RC1 SET dcrxid_home='000129' WHERE dcrxid_home='090456';
        UPDATE finalResult_RC1 SET dcrxid_home=''       WHERE dcrxid_home='BBBBBB';
        
        SELECT *
        INTO dedup_d
        FROM {masterList} 
        SELECT *
        INTO dedup_d2
        FROM {masterList}
        
        SELECT *
        INTO dedup_b
        FROM {masterList}
        """.format(masterList=masterListDTN)
        dbContext.executeNoResults(query)
        if version == 2:
            query = """
                UPDATE finalResult_RC1 SET dcrxid_home='000129' WHERE dcrxid_home IN ('008071', '067629', '097923');
                UPDATE finalResult_RC1 SET dcrxid_attend='051284' WHERE dcrxid_attend='051292';
                UPDATE finalResult_RC1 SET dcrxid_attend='051060' WHERE dcrxid_attend='064998';
            """
            dbContext.executeNoResults(query)
        dbContext.executeNoResults(nodupkey("dedup_d", "dcrxid_c"))
        dbContext.executeNoResults(nodupkey("dedup_d2", "dcrxid_c"))
        dbContext.executeNoResults(nodupkey("dedup_b", "bcrxid_c"))
        query = """
        ALTER TABLE dedup_d DROP COLUMN bcrxid_c, schtype, source, bcrxnm
        EXEC SP_RENAME 'dedup_d.dcrxid_c', 'dcrxid_home', 'COLUMN'
        EXEC SP_RENAME 'dedup_d.distrtype', 'distrtype_home', 'COLUMN'
        EXEC SP_RENAME 'dedup_d.dcrxnm', 'dcrxnm_home', 'COLUMN'
        
        ALTER TABLE dedup_b DROP COLUMN dcrxid_c, distrtype, source, dcrxnm
        EXEC SP_RENAME 'dedup_b.bcrxid_c', 'bcrxid_home', 'COLUMN'
        EXEC SP_RENAME 'dedup_b.schtype', 'schtype_home', 'COLUMN'
        EXEC SP_RENAME 'dedup_b.bcrxnm', 'bcrxnm_home', 'COLUMN'
        ALTER TABLE finalResult_RC1 DROP COLUMN dcrxnm_home, distrtype_home;
        """
        dbContext.executeNoResults(query)
        query = """
        
        SELECT F.*, D.[dcxx_county] as [dcxx_county2], D.[dcrxnm_home], D.[distrtype_home]
        INTO merge_master1
        FROM finalResult_RC1 F
        LEFT JOIN dedup_d D ON (D.dcrxid_home = F.dcrxid_home);
        
        UPDATE merge_master1 
        SET dcxx_county=dcxx_county2 
        WHERE dcxx_county='' OR dcxx_county IS NULL;
        
        ALTER TABLE merge_master1 DROP COLUMN dcxx_county2, bcrxnm_home, schtype_home;
        """
        dbContext.executeNoResults(query)
        query = """
        
        SELECT F.*, D.[dcxx_county] as [dcxx_county2], D.[bcrxnm_home], D.[schtype_home]
        INTO OGT_RC1_1
        FROM merge_master1 F
        LEFT JOIN dedup_b D ON (D.bcrxid_home = F.bcrxid_home)
        
        UPDATE OGT_RC1_1
        SET dcxx_county=dcxx_county2 
        WHERE dcxx_county='' AND dcxx_county IS NULL;
        
        ALTER TABLE OGT_RC1_1 DROP COLUMN dcxx_county2;
        """
        dbContext.executeNoResults(query)
        query = """
        
        SELECT *
        INTO OGT_P2 
        FROM OGT_RC1_1
        WHERE bcrxid_attend='999999'
        SELECT *
        INTO OGT_P1 
        FROM OGT_RC1_1
        WHERE bcrxid_attend<>'999999' or bcrxid_attend is null;
        
        ALTER TABLE OGT_P1 DROP COLUMN bcrxnm_attend, dcrxnm_attend
        SELECT A.*, B.dcrxnm as dcrxnm_attend, B.bcrxnm as bcrxnm_attend
        INTO OGT_RC1_2
        FROM OGT_P1 A 
        LEFT JOIN %s B ON (A.bcrxid_attend = B.bcrxid_c and A.dcrxid_attend = B.dcrxid_c)
        
        ALTER TABLE OGT_P2 DROP COLUMN dcrxnm_attend
        SELECT A.*, B.dcrxnm as dcrxnm_attend
        INTO OGT_RC1_3
        FROM OGT_P2 A
        LEFT JOIN dedup_d2 B ON (A.dcrxid_attend = B.dcrxid_c)
        """ % (masterListDTN)
        dbContext.executeNoResults(query)

        ts = get_table_spec("OGT_RC1_2", dbContext)
        ts.populate_from_connection()
        cols = [c.field_name for c in ts]
        query = """
        SELECT *
        INTO {finalTable}
        FROM
        (SELECT {column_names} FROM OGT_RC1_2 
        UNION ALL
        SELECT {column_names} FROM OGT_RC1_3) as tmp;
        
        UPDATE {finalTable} SET dcxx_county=SUBSTRING(M.dcxx_county, 0, 30)
        FROM {finalTable} F
        LEFT JOIN {masterList} M ON (M.dcrxid_c = F.dcrxid_attend);
        """.format(finalTable=rcOutputDTN,
                   masterList=masterListDTN,
                   column_names=Joiner(cols))
        dbContext.executeNoResults(query)

        if version == 1:
            query = """
            Select LTRIM(RTRIM(ucrx_preid)) as ucrx_preid
            INTO blanklist  
            FROM {finalTable} 
            where ucrx_preid not in (SELECT ucrx_preid from Tidefile)
            and ucrx_preid<>'' and ucrx_preid is not null;
            
            UPDATE {finalTable} SET ucrx_preid='' WHERE ucrx_preid IN (SELECT ucrx_preid FROM blanklist);
            
            SELECT * 
            INTO OGT_Tide
            FROM {finalTable}
            
            UPDATE OGT_Tide SET ucrxgen='F' WHERE ucrxgen='1'
            UPDATE OGT_Tide SET ucrxgen='M' WHERE ucrxgen='2'
            UPDATE OGT_Tide SET studentmid='' WHERE studentmid='*'
            """.format(finalTable=rcOutputDTN)
            dbContext.executeNoResults(query)

            lengths = dbContext.execute("SELECT length FROM TideLayout")
            var_names = dbContext.execute(
                "SELECT variable_name FROM TideLayout WHERE variable_name NOT IN ('filler0', 'filler1', 'filler2', 'filler3', 'filler4', 'filler5', 'filler6', 'filler7', 'drops1', 'drops2', 'drops3', 'preid_num1', 'preid_num2', 'ufxx_accel_test', 'upcx_DNS', 'upcx_INV', 'upmx_DNS', 'upmx_INV', 'uprx_DNS', 'uprx_INV', 'uprx_TOG', 'upsx_DNS', 'upsx_INV', 'upwx_DNS', 'upwx_INV' )"
            )
            var_names_all = dbContext.execute(
                "SELECT variable_name FROM TideLayout")
            lengths = [l[0] for l in lengths]
            var_names = [v[0] for v in var_names]
            var_names_all = [v[0] for v in var_names_all]
            # Some RC versions seperate homeschool, some do not
            #rows = dbContext.execute( "SELECT {cols} FROM OGT_Tide ORDER BY CONVERT(binary(30), ssid), lithocode".format(cols=Joiner(var_names)) )
            rows = dbContext.execute(
                "SELECT {cols} FROM OGT_Tide WHERE schtype_attend <> 'H' ORDER BY CONVERT(binary(30), ssid), lithocode"
                .format(cols=Joiner(var_names)))

            file_s = datetime.datetime.now()
            fh = open(tideDataOutputFile, "w")
            for row in rows:
                for i, v in enumerate(var_names_all):
                    s = "{:<" + str(lengths[i]) + "}"
                    if v in var_names:
                        for j, v2 in enumerate(var_names):
                            if (v == v2):
                                if v2 in [
                                        "grade", "dob_day", "dob_month",
                                        "dob_year"
                                ]:
                                    if row[j] is None:
                                        s = "{:>" + str(lengths[i]) + "}"
                                        fh.write(s.format("."))
                                    else:
                                        s = "{:0>" + str(lengths[i]) + "}"
                                        fh.write(
                                            s.format(int(
                                                row[j]))[0:lengths[i]])
                                else:
                                    fh.write(
                                        s.format(row[j] or "")[0:lengths[i]])
                                break
                    else:
                        fh.write(s.format(""))
                fh.write("\n")
            fh.close()
            print "time to write file: " + str(datetime.datetime.now() -
                                               file_s)

            if testOutput:
                f1 = open(tideDataOutputFile, 'r')
                f2 = open(tideDataTestCopy, 'r')
                lines1 = f1.readlines()
                lines2 = f2.readlines()
                for i, l1 in enumerate(lines1):
                    if l1 != lines2[i]:
                        print i
                        print l1
                        print lines2[i]
                        raise Exception
                print "Complete Match!"

        print "******** FINISHED ********" + str(datetime.datetime.now())
 def testJoinerMultipleSequences(self):
     cols = Joiner( [ 'column_1 VARCHAR(255)', 'column_2 VARCHAR(255)', 'column_3 VARCHAR(255)' ], [], [ 'PRIMARY KEY(column_1)' ] )
     out = "CREATE TABLE {table}({cols:delimiter=','})".format( cols=cols , table="table_1" )
     self.assertEqual( 'CREATE TABLE table_1(column_1 VARCHAR(255),column_2 VARCHAR(255),column_3 VARCHAR(255),PRIMARY KEY(column_1))',
                       out, 'Wrong output \'{}\''.format( out ) )
예제 #15
0
    def test_21(self):
        answer_dir = os.path.join(self.answer_dir, 'test_21')
        if not os.path.exists(answer_dir):
            os.makedirs(answer_dir)
        specimen_dir = os.path.join(self.specimen_dir, 'ttest_test_21')
        result = True

        with self.read_g3() as g3, \
             self.read_oat_agg_sheet() as agg_sheet, \
             dbutilities.get_temp_table( self.db_context ) as tmp, \
             dbutilities.get_temp_table( self.db_context ) as tmp_agg :

            # As near as I can tell, the SAS test only runs for the tenth row of the agg sheet.
            # self.db_context.executeNoResults("DELETE FROM {agg_sheet} WHERE [import_order] != 10".format( agg_sheet=agg_sheet ))

            # We are just using this TTest instance to read the aggregation sheet. The actual ttest will use
            # another instance based on a truncated aggregation sheet.
            agg_sheet_reader = TTest(g3, self.db_context, agg_sheet, None,
                                     False)
            agg_sheet_reader.readAggData()

            assert dbutilities.table_exists(g3)

            targetParentRow = []
            for target_level in agg_sheet_reader.target_levels:
                for parent_level in target_level.contents:
                    for row in parent_level.contents:
                        targetParentRow.append(
                            (target_level, parent_level, row))

            targetParentRow.sort(key=lambda (row): row[2].import_order)

            for target_level, parent_level, row in targetParentRow:
                where_t = target_level.get_where_expression()
                target_id = target_level.id
                where_p = parent_level.get_where_expression()
                parent_id = parent_level.id
                i = row.import_order

                # Reduce the data to the desired sample
                dbutilities.drop_table_if_exists(tmp)
                query = """
                SELECT {vars},
                        COUNT( {input_var} ) OVER( PARTITION BY {parent_id}, {target_id} ) AS n_target,
                        0 AS n_parent
                INTO {tmp}
                FROM {g3}
                WHERE {where_t}
                """.format(parent_id=parent_id,
                           target_id=target_id,
                           input_var=row.inputvar,
                           where_t=where_t,
                           tmp=tmp,
                           g3=g3,
                           vars=Joiner(g3))
                self.db_context.executeNoResults(query)
                query = """
                UPDATE {tmp} SET n_parent = A.B FROM (
                    SELECT n_parent, COUNT( {input_var} ) OVER( PARTITION BY {parent_id} ) AS B
                    FROM {tmp}
                    WHERE {where_p}
                ) AS A
                """.format(parent_id=parent_id,
                           input_var=row.inputvar,
                           where_p=where_p,
                           tmp=tmp)
                print query
                self.db_context.executeNoResults(query)
                query = "DELETE FROM {tmp} WHERE ( n_parent != 2 ) OR ( n_target != 1 )".format(
                    tmp=tmp)
                self.db_context.executeNoResults(query)
                n_obs = dbutilities.n_obs(tmp)
                if n_obs > 0:

                    # Reduce the aggregation sheet to the current row
                    query = "SELECT * INTO {tmp_agg} FROM {agg_sheet} WHERE [import_order]={i}".format(
                        tmp_agg=tmp_agg, agg_sheet=agg_sheet, i=i)
                    self.db_context.executeNoResults(query)

                    # Do the ttest
                    ttester = TTest(tmp, self.db_context, tmp_agg, None, False)
                    ttester.readAggData()
                    ttester.execute()

                    # Check the answer
                    answer_file = os.path.join(
                        answer_dir, 'row_{0}_comparison.log'.format(i))
                    specimen_file = os.path.join(
                        specimen_dir, 'test_21_ttest_{0}.xls'.format(i))
                    result_i = self.compare_output(specimen_file, target_level,
                                                   answer_file)
                    result = result and result_i
                    print "{1} ttest test_21 for {0}".format(
                        i, 'PASSED' if result_i else 'FAILED')

                    self.assertTrue(result, "TTest Test 21 FAILED")
                    return
예제 #16
0
            db_context=dbContext)
        valid_cols += get_column_names(
            "{studentTable}2".format(studentTable=studentTable),
            db_context=dbContext)
        valid_cols = [i.replace('[', '') for i in valid_cols]
        valid_cols = [i.replace(']', '').lower() for i in valid_cols]
        cols = dbContext.execute(
            "SELECT variable FROM ogt_dl_vars WHERE LOWER([value type])='variable' --variable IS NOT NULL AND variable<>'''''' AND variable<>'' AND  "
        )
        cols = [c[0].lower() for c in cols]
        valid_cols = [c for c in valid_cols if c in cols]
        valid_cols += [
            'ufxx_sample', 'A.lithocode'.format(studentTable=studentTable)
        ]
        query = "SELECT {cols} INTO OGT_DL0 FROM {studentTable}0 A JOIN {studentTable}1 B ON (A.lithocode = B.lithocode) JOIN {studentTable}2 C ON (A.lithocode = C.lithocode)".format(
            cols=Joiner(valid_cols), studentTable=studentTable)
        dbContext.executeNoResults(query)

        drop_tables([
            'OGT_DL1', 'OGT_DL2', 'OGT_DL3', 'OGT_DL4', 'OGT_DL_tmp0',
            'OGT_DL_tmp1'
        ], dbContext)
        query = """
        UPDATE OGT_DL0 SET ucsx_classname=REPLACE(ucsx_classname,'[','(');
        UPDATE OGT_DL0 SET ucrxlnm=REPLACE(ucrxlnm,'`','''');
        UPDATE OGT_DL0 SET ucrxfnm=REPLACE(ucrxfnm,'`','''');
        UPDATE OGT_DL0 SET ucxx_room_number=REPLACE(ucxx_room_number,'?','');
        UPDATE OGT_DL0 SET ucmx_coursecode=REPLACE(ucmx_coursecode,'`','');
        DELETE FROM OGT_DL0 WHERE bcrxid_attend='000001';
        UPDATE OGT_DL0 SET ssid='' WHERE ssid NOT LIKE '[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]_' 
        UPDATE OGT_DL0 SET dcrxid_home=' ' WHERE dcrxid_home='     .';
예제 #17
0
    def readAggData(self):
        
        # Validate the input file
        self.ds_in = get_table_spec( self.ds_in, self.db_context )
        self.ds_in.populate_from_connection()
        self.db_context = self.ds_in.db_context
        self.run_context = self.db_context.runContext
        self.run_context.debug( "processing control file" )
        if not table_exists( self.ds_in ):
            raise ValueError( "Input dataset {} does not exist".format( self.ds_in ) )
        
        # Read the control file
        # SAS 3-9
        if self.use_excel:
            reader = SafeExcelReader( self.run_context, self.agg_ds, self.agg_sheet )
            self.agg_data = [ row for row in reader.getRows() ]
        else:
            self.agg_ds = get_table_spec( self.agg_ds, self.db_context )
            self.agg_data = dump( self.agg_ds )
        
        # Validate the control file columns
        # SAS 10-28
        missing_vars = set()
        for var_name in [ 'outputvar', 'inputvar', 'targetlevel', 'targetid', 'wheret', 'wheret_value', 'parentlevel',
                          'parentid', 'wherep', 'wherep_value', 'critval' ]:
            if var_name not in self.agg_data[0]:
                missing_vars.add( var_name )
        if missing_vars:
            raise ValueError( "TTest control sheet lacks required columns: {:', '}".format( Joiner( missing_vars ) ) )
        
        # Validate existence of requested columns
        # SAS 29-86
        for row in self.agg_data:
            if row.wheret is None:
                row.wheret = []
            else:
                row.wheret = [ x.strip().lower() for x in row.wheret.strip().split( '*' )]
                
            if row.wherep is None:
                row.wherep = []
            else:
                row.wherep = [ x.strip().lower() for x in row.wherep.strip().split( '*' )]
                
            if row.wheret_value is None:
                row.wheret_value = []
            else:
                row.wheret_value = [ x.strip() for x in row.wheret_value.strip().split( ' ' )]
                
            if row.wherep_value is None:
                row.wherep_value = []
            else:
                row.wherep_value = [ x.strip() for x in row.wherep_value.strip().split( ' ' )]
                
            row.inputvar = row.inputvar.lower().strip()
            row.targetid = row.targetid.lower().strip()
            row.parentid = row.parentid.lower().strip()
            row.targetlevel = row.targetlevel.lower().strip()
            row.parentlevel = row.parentlevel.lower().strip()

            for var_name in ( row.wheret
                              + row.wherep
                              + [ row.inputvar, row.targetid, row.parentid ] ):
                if var_name != '' and var_name not in self.ds_in:
                    missing_vars.add( var_name )
        if missing_vars:
            raise ValueError( "TTest input data lacks required variables: {:', '}".format( Joiner( missing_vars ) ) )
        
        # Sort control data
        #SAS 87-90
        self.agg_data.sort( key=lambda row : ( row.targetlevel, row.parentlevel ) )
        
        # Check for consistency across "target" and "parent" variables.
        #SAS 91-222
        last_targetlevel = _NONE_LEVEL
        last_parentlevel = _NONE_LEVEL
        self.target_levels = []
        messages = []
        
        for row in self.agg_data:
            wheret = tuple( row.wheret )
            wheret_value = tuple ( row.wheret_value )
            if len( wheret ) != len( wheret_value ):
                messages.append( 'Number of wheret_value items must match number of wheret items ("{0}" vs "{1}")'.format(
                        row.wheret, row.wheret_value ) )
            if row.targetlevel != last_targetlevel.level:
                last_targetlevel = LevelData( row.targetlevel, row.targetid, wheret, wheret_value )
                self.target_levels.append( last_targetlevel )
                last_parentlevel = _NONE_LEVEL
                
                # Create an output table in which to accumulate the results
                table_name = 'ttest_' + row.targetlevel
                last_targetlevel.output_table = TableSpec( self.db_context, table_name )
                last_targetlevel.output_table.add( self.ds_in[ row.targetid ].clone() )
            else:
                last_targetlevel.check( row.targetid, wheret, messages )
            
            wherep = tuple( row.wherep )
            wherep_value = tuple ( row.wherep_value )
            if len( wherep ) != len( wherep_value ):
                messages.append( 'Number of wherep_value items must match number of wherep items ("{0}" vs "{1}")'.format(
                        row.wherep, row.wherep_value ) )
            if row.parentlevel != last_parentlevel.level:
                last_parentlevel = LevelData( row.parentlevel, row.parentid, wherep, wherep_value )
                last_targetlevel.contents.append( last_parentlevel )
            else:
                last_parentlevel.check( row.parentid, wherep, messages )
                
            last_parentlevel.contents.append( row )
            last_targetlevel.output_table.add( FieldSpec( row.outputvar, 'TINYINT' ) )
            
            try:
                row.critval = float( row.critval )
                if not MIN_CRITVAL <= row.critval <= MAX_CRITVAL:
                    messages.append( "Bad critical value {} is not between {} and {}".format(
                            row.critval, MIN_CRITVAL, MAX_CRITVAL ) )
            except ValueError:
                messages.append( "Critical value {} is not a float".format( row.critval ) )
                
            try:
                row.outputvar = db_identifier_quote( row.outputvar )
            except ValueError:
                messages.append( "Output variable name {} is not a valid database identifier".format( row.outputvar ) )
                
            try:
                row.targetlevel = db_identifier_quote( row.targetlevel )
            except ValueError:
                messages.append( "Target level name {} is not a valid database identifier".format( row.targetlevel ) )
                
        for message in messages:
            self.run_context.error( message )
        
        if messages:
            raise ValueError( "Invalid inputs to ttest macro. See log for details" )
 def testJoiner(self):
     cols = Joiner( ['column_1', 'column_2', 'column_3'] )
     out = "SELECT {cols:delimiter=','} FROM {table}".format( cols=cols , table="table_1" )
     self.assertEqual( 'SELECT column_1,column_2,column_3 FROM table_1', out, 'Wrong output \'{}\''.format( out ) )
예제 #19
0
    def odeFilesMain(formatTable, stateTable, districtTable, runContextName):
        #output a text and cvs file for each dcrxid  (i.e. 000123.cvs)
        #select  data where dcrxid = this district's dcrxid
        #select entire dataset (order by dcrxid, grade, ucrxlnm, ucrxfnm
        var_names = dbContext.execute(
            "SELECT variable FROM {formatTable} WHERE [value type]='variable' order by start"
            .format(formatTable=formatTable))
        var_names = [v[0] for v in var_names]

        #Check if variables listed in excel file exist in data - raise exception if they do not
        rows0 = dbContext.execute(
            "SELECT TOP 1 {cols} FROM {districtTable}".format(
                cols=Joiner(var_names), districtTable=districtTable))
        rows0 = dbContext.execute(
            "SELECT TOP 1 {cols} FROM {stateTable}".format(
                cols=Joiner(var_names), stateTable=stateTable))

        districts = dbContext.execute(
            "SELECT DISTINCT dcrxid FROM {districtTable} order by dcrxid".
            format(districtTable=districtTable))
        lengths = dbContext.execute(
            "SELECT length FROM {formatTable} order by start".format(
                formatTable=formatTable))
        fields = dbContext.execute(
            "SELECT variable FROM {formatTable} WHERE variable <> 'dcrxid' order by start"
            .format(formatTable=formatTable))
        types = dbContext.execute(
            "SELECT [value type] FROM {formatTable} order by start".format(
                formatTable=formatTable))
        const_names = dbContext.execute(
            "SELECT variable FROM {formatTable} WHERE [value type]<>'variable' order by start"
            .format(formatTable=formatTable))
        starts = dbContext.execute(
            "SELECT start FROM {formatTable} order by start".format(
                formatTable=formatTable))
        outformats = dbContext.execute(
            "SELECT [out format] FROM {formatTable} order by start".format(
                formatTable=formatTable))
        names = dbContext.execute(
            "SELECT [ODE col name] FROM {formatTable} WHERE variable <> 'dcrxid' order by start"
            .format(formatTable=formatTable))

        districts = [d[0] for d in districts]
        lengths = [l[0] for l in lengths]
        fields = [t[0] for t in fields]
        types = [t[0] for t in types]
        const_names = [c[0] for c in const_names]
        starts = [s[0] for s in starts]
        outformats = [o[0] for o in outformats]
        names = [n[0] for n in names]

        #outputDistrictFiles()
        #outputStateFile()

        #ps_state = Process(target=outputStateFile, args=(outputDir, runContextName, stateTable, fields, starts, names, lengths, types, var_names, outformats))
        ps = [
            Process(target=outputStateFile,
                    args=(i, outputDir, runContextName, stateTable, fields,
                          starts, names, lengths, types, var_names,
                          outformats))
            for i, x in enumerate(range(NUM_PROCESSES))
        ]
        #ps_state.start()
        [p.start() for p in ps]
        [p.join() for p in ps]

        fh = open(outputDir + "WholeState.txt", "w")
        for i in range(NUM_PROCESSES):
            f = open(outputDir + "WholeState%s.txt" % i, "r")
            fh.write(f.read())
            f.close()
        fh.close()

        print "STATE FILE COMPLETE " + str(datetime.datetime.now())

        q = Queue()
        lock = Lock()
        [q.put(x) for x in districts]
        ps = [
            Process(target=outputDistrictFiles,
                    args=(q, i, lock, outputDir, runContextName, districtTable,
                          fields, starts, names, lengths, types, var_names,
                          outformats))
            for i, x in enumerate(range(NUM_PROCESSES))
        ]
        [p.start() for p in ps]
        [p.join() for p in ps]
예제 #20
0
    def execute(self):

        # Validate inputs
        self.validate()

        # Delete output tables if they exist
        drop_table_if_exists(self)
        if self.fuzzy_report_table:
            drop_table_if_exists(self.fuzzy_report_table)
        if self.left_remain_table:
            drop_table_if_exists(self.left_remain_table)
        if self.right_remain_table:
            drop_table_if_exists(self.right_remain_table)

        # Scan for illegal duplication of required keys in both tables
        left_dup_table, left_reject_table = self._process_required_key_dups_and_missing(
            self.db_context, self.left_input_table, 'left',
            self.allow_dups_left)
        right_dup_table, right_reject_table = self._process_required_key_dups_and_missing(
            self.db_context, self.right_input_table, 'right',
            self.allow_dups_right)

        # We will create an initial table that contains only the required keys,
        # optional keys, fuzzy keys, foreign keys and the duplicate detection columns
        merge_table, left_fields, right_fields = self._create_merge_table()

        # If necessary, remove duplicates that appear in both tables
        if self.allow_dups_left and self.allow_dups_right and not self.allow_dups_both:
            with get_temp_table(self.db_context) as full_dup_table:

                query = _DUPS_BOTH_QUERY.format(
                    left_dup_table=left_dup_table,
                    right_dup_table=right_dup_table,
                    full_dup_table=full_dup_table,
                    key=Joiner(self.required_merge_keys))
                self.db_context.executeNoResults(query)
                query = _MARK_DUPS_BOTH_QUERY.format(
                    dup_table=left_dup_table,
                    full_dup_table=full_dup_table,
                    key=Joiner(self.required_merge_keys))
                self.db_context.executeNoResults(query)
                self._move_rejects(left_dup_table, left_reject_table, 0,
                                   'has_dups_both<>0',
                                   "'Duplicate required key on both sides'")
                query = _MARK_DUPS_BOTH_QUERY.format(
                    dup_table=right_dup_table,
                    full_dup_table=full_dup_table,
                    key=Joiner(self.required_merge_keys))
                self.db_context.executeNoResults(query)
                self._move_rejects(right_dup_table, right_reject_table, 0,
                                   'has_dups_both<>0',
                                   "'Duplicate required key on both sides'")

        # Perform the first merge (required key)
        query = _FIRST_MERGE_QUERY.format(
            merge_table=merge_table,
            required_key=Joiner(self.required_merge_keys),
            foreign_key_l=Joiner(merge_table.foreign_keys[0]),
            foreign_key_r=Joiner(merge_table.foreign_keys[1]),
            left_dup_table=left_dup_table,
            right_dup_table=right_dup_table)
        self.db_context.executeNoResults(query)
        left_dup_table.drop()
        right_dup_table.drop()

        #Remove rejects after the first merge
        self._move_rejects(merge_table, left_reject_table, 0,
                           'has_dups_r IS NULL', "'No required key match'")
        self._move_rejects(merge_table, right_reject_table, 1,
                           'has_dups_l IS NULL', "'No required key match'")

        # Bring in optional and fuzzy keys
        if len(left_fields) > 0:
            query = _ADD_MORE_KEYS_QUERY.format(
                fields=Joiner(left_fields),
                merge_table=merge_table,
                from_table=self.left_input_table,
                side='left',
                required_key=Joiner(merge_table.foreign_keys[0]))
            self.db_context.executeNoResults(query)
        if len(right_fields) > 0:
            query = _ADD_MORE_KEYS_QUERY.format(
                fields=Joiner(right_fields),
                merge_table=merge_table,
                from_table=self.right_input_table,
                side='right',
                required_key=Joiner(merge_table.foreign_keys[1]))
            self.db_context.executeNoResults(query)

        key_fields = Joiner(self.required_merge_keys,
                            merge_table.foreign_keys[0],
                            merge_table.foreign_keys[1])

        # Flag matches for rejection based on optional keys
        if len(self.optional_merge_keys) > 0:
            indices = Joiner(range(1, len(self.optional_merge_keys) + 1))
            reason_expression = _OPTIONAL_MISMATCH_MESSAGE.format(
                indices=indices)
            query = _OPTIONAL_MISMATCH_QUERY.format(
                merge_table=merge_table,
                indices=indices,
                reason_expression=reason_expression)
            self.db_context.executeNoResults(query)

        # Flag matches for rejection based on fuzzy keys
        if len(self.fuzzy_merge_keys) > 0:
            indices = Joiner(range(1, len(self.fuzzy_merge_keys) + 1))
            query = _FUZZY_MATCH_QUERY.format(merge_table=merge_table,
                                              indices=indices,
                                              schema=self.db_context.schema)
            self.db_context.executeNoResults(query)

            # Create fuzzy report table
            if self.fuzzy_report_table is not None:
                query = _FUZZY_MATCH_REPORT_QUERY.format(
                    key_fields=key_fields,
                    indices=indices,
                    report_table=self.fuzzy_report_table,
                    merge_table=merge_table)
                self.db_context.executeNoResults(query)

            # Drop fuzzy mismatches
            reason_expression = _FUZZY_MISMATCH_MESSAGE.format(indices=indices)
            query = _FUZZY_MISMATCH_QUERY.format(
                merge_table=merge_table,
                thresholds=Joiner(self.similarity_thresholds),
                schema=self.db_context.schema,
                reason_expression=reason_expression)
            self.db_context.executeNoResults(query)

        # Move keys rejected due to optional or fuzzy matches
        reason_expression = "{fld:qualified}".format(
            fld=merge_table['merge_report'])
        self._copy_rejects(merge_table, left_reject_table, 0, '(reject<>0)',
                           reason_expression)
        self._copy_rejects(merge_table, right_reject_table, 1, '(reject<>0)',
                           reason_expression)
        self._delete_rows(merge_table, '(reject<>0)')

        # Meld columns in main merge table, including data columns that did not participate in the merge
        self.left_input_table.alias = "L"
        self.right_input_table.alias = "R"
        merge_table.alias = "A"
        key_field_names = [x.field_name for x in key_fields]
        original_fields = self[:]
        meld_fields = [x for x in self if x.field_name not in key_field_names]
        for key_field in key_fields:
            if key_field not in self:
                self.add(key_field.clone())

        self.db_context.executeNoResults(self.definition)

        query = _MELD_FIELDS_QUERY.format(
            merge_table=merge_table,
            out_table=self,
            meld_fields=Joiner(meld_fields),
            key_fields=key_fields,
            left_table=self.left_input_table,
            left_key=Joiner(merge_table.foreign_keys[0]),
            right_table=self.right_input_table,
            right_key=Joiner(merge_table.foreign_keys[1]))

        self.db_context.executeNoResults(query)

        # Add non-matched records for outer joins
        if self.join_type in (JOIN_TYPE_LEFT, JOIN_TYPE_FULL):
            query = _RESTORE_REJECTS_QUERY.format(
                out_table=self,
                reject_table=left_reject_table,
                input_table=self.left_input_table,
                fields=Joiner(original_fields),
                foreign_key=Joiner(left_reject_table.foreign_keys[0]),
                side='left')
            self.db_context.executeNoResults(query)

        if self.join_type in (JOIN_TYPE_RIGHT, JOIN_TYPE_FULL):
            query = _RESTORE_REJECTS_QUERY.format(
                out_table=self,
                reject_table=right_reject_table,
                input_table=self.right_input_table,
                fields=Joiner(original_fields),
                foreign_key=Joiner(right_reject_table.foreign_keys[0]),
                side='right')
            self.db_context.executeNoResults(query)

        # Bring into the remainder tables the data columns that did not participate in the merge
        if self.left_remain_table is not None and self.join_type in (
                JOIN_TYPE_RIGHT, JOIN_TYPE_INNER):
            # Create the table
            del self.left_remain_table[:]
            self.left_remain_table.add_all(left_reject_table.foreign_keys[0])
            self.left_remain_table.add_all(meld_fields)
            self.left_remain_table.add(left_reject_table.merge_report.clone())
            self.db_context.executeNoResults(self.left_remain_table.definition)

            left_reject_table.alias = "A"
            query = _REMAIN_TABLE_QUERY.format(
                key_fields=Joiner(left_reject_table.foreign_keys[0]),
                data_fields=Joiner(meld_fields),
                remain_table=self.left_remain_table,
                reject_table=left_reject_table,
                input_table=self.left_input_table,
                side='left')

            self.db_context.executeNoResults(query)

        if self.right_remain_table is not None and self.join_type in (
                JOIN_TYPE_LEFT, JOIN_TYPE_INNER):
            del self.right_remain_table[:]
            self.right_remain_table.add_all(right_reject_table.foreign_keys[0])
            self.right_remain_table.add_all(meld_fields)
            self.right_remain_table.add(
                right_reject_table.merge_report.clone())
            self.db_context.executeNoResults(
                self.right_remain_table.definition)

            right_reject_table.alias = "A"
            query = _REMAIN_TABLE_QUERY.format(
                key_fields=Joiner(right_reject_table.foreign_keys[0]),
                data_fields=Joiner(meld_fields),
                remain_table=self.right_remain_table,
                reject_table=right_reject_table,
                input_table=self.right_input_table,
                side='right')
            self.db_context.executeNoResults(query)

        left_reject_table.drop()
        right_reject_table.drop()
        merge_table.drop()