class TestCheckPrimers(unittest.TestCase):
    def setUp(self):
        self.ets = ExcelToSQL('TestCP_COL4A5.xlsx', 'Test.db')
        df_primers, primer_faults = self.ets.get_primers()
        self.cp = CheckPrimers(df_primers)

    def test_check_gene(self):
        checks = self.cp.check_gene()
        self.assertEqual(checks, 8)

    def test_check_exon(self):
        checks = self.cp.check_exon()
        self.assertEqual(checks, 4)

    def test_check_direction(self):
        checks = self.cp.check_direction()
        self.assertEqual(checks, 2)

    def test_check_version(self):
        checks = self.cp.check_version()
        self.assertEqual(checks, 1)

    def test_check_seq(self):
        checks = self.cp.check_seq()
        self.assertEqual(checks, 3)

    def test_check_tag(self):
        checks = self.cp.check_tag()
        self.assertEqual(checks, 2)

    def test_check_batch(self):
        checks = self.cp.check_batch()
        self.assertEqual(checks, 3)

    def test_check_dates(self):
        checks = self.cp.check_dates()
        self.assertEqual(checks, 1)

    def test_check_frag_size(self):
        checks = self.cp.check_frag_size()
        self.assertEqual(checks, 2)

    def test_check_anneal_temp(self):
        checks = self.cp.check_anneal_temp()
        self.assertEqual(checks, 1)

    def test_check_all(self):
        self.assertEqual(len(self.cp.check_all()), 10)
 def setUp(self):
     self.ets = ExcelToSQL('TestCP_COL4A5.xlsx', 'Test.db')
     df_primers, primer_faults = self.ets.get_primers()
     self.cp = CheckPrimers(df_primers)
Esempio n. 3
0
    def to_db(self, df_combined, gene_name):
        """Creates tables and adds data into the database.
           Function modifies the given data frame to generate three tables in the database (Primers, SNPs, Genes) and
           performs data checks. If data for a particular gene is already in the database, this is overridden and the
           previous data is saved to an excel document (archived_files).
                The commented out section should only be used for the first file to initially set up the tables.
                :param gene_name: gene to check against database.
                :param df_combined: data frame to be inserted into database.
                :return info: description of action performed (for audit log).
                :return archived_filename: filename the previous data is saved under (for audit log).
        """

        # (1) Creates database schema
        curs.execute("CREATE TABLE IF NOT EXISTS Primers(PrimerId INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, "
                     "Gene TEXT, Exon TEXT, Direction TEXT, Version INTEGER, Primer_Seq TEXT, Chrom TEXT, M13_Tag TEXT"
                     ", Batch TEXT, Project TEXT, Order_date TEXT, Frag_size INTEGER, Anneal_Temp TEXT, Other TEXT, "
                     "snp_check INTEGER, no_snps INTEGER, rs TEXT, hgvs TEXT, freq TEXT, ss TEXT, ss_proj TEXT, "
                     "other2 TEXT, action_to_take TEXT, check_by TEXT, start TEXT, end TEXT, name TEXT)")

        curs.execute("CREATE TABLE IF NOT EXISTS SNPs(SNP_Id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, Gene TEXT, "
                     "Exon TEXT, Direction TEXT, snp_check INTEGER, rs TEXT, hgvs TEXT, freq TEXT, ss TEXT, "
                     "ss_proj TEXT, other2 TEXT, action_to_take TEXT, check_by TEXT, name TEXT)")

        curs.execute("CREATE TABLE IF NOT EXISTS Genes(Gene_Id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, Gene TEXT)")

        # (2) Drops unnecessary columns to make two tables and removes duplicates.
        primertable_cols_to_drop = ['snp_check', 'rs', 'hgvs', 'freq', 'ss', 'ss_proj', 'other2', 'action_to_take',
                                    'check_by']
        snptable_cols_to_drop = ['Exon', 'Direction', 'Version', 'Primer_seq', 'Chrom', 'M13_tag', 'Batch', 'project',
                                 'Order_date', 'Frag_size', 'anneal_temp', 'Other', 'no_snps', 'start', 'end']

        df_primertable = df_combined.drop(primertable_cols_to_drop, axis=1)
        df_primertable = df_primertable.drop_duplicates(subset=('Gene', 'Exon', 'Direction', 'Chrom'))
        df_snptable = df_combined.drop(snptable_cols_to_drop, axis=1)

        # (3) Performs data checks using CheckPrimers and CheckSNPs classes.
        check = CheckPrimers(df_primertable, df_snptable)
        total_errors, error_details = check.check_all()

        # (4) Checks if gene data already in database.
        uni_gene = '(u\'%s\',)' % gene_name
        gene = self.check_in_db(gene_name)   # this outputs a unicode string

        # (5) Adds to database if no errors. Overrides data if already present.
        archived_filename = None
        if total_errors == 0:
            if str(uni_gene) == str(gene):
                # Add query to data frame then save to excel.
                get_old_query = "SELECT p.Gene, p.Exon, p.Direction, p.Version, p.Primer_seq, p.Chrom, p.M13_Tag, " \
                                "p.Batch, p.Project, p.Order_date, p.Frag_size, p.Anneal_Temp, p.Other, s.snp_check, " \
                                "p.no_snps, s.rs, s.hgvs, s.freq, s.ss, s.ss_proj, s.other2, s.action_to_take, " \
                                "s.check_by FROM SNPs s LEFT JOIN Primers p ON s.name = p.name WHERE p.Gene='%s'" % \
                                gene_name
                today_date = datetime.datetime.now().strftime("%d-%m-%Y_%H%M")
                df_sql = pd.read_sql_query(get_old_query, con=con)
                archived_filename = '%s_%s' % (gene_name, today_date)
                writer = ExcelWriter('%s.xlsx' % archived_filename)
                df_sql.to_excel(writer, '%s' % today_date, index=False)
                writer.save()
                os.system("mv /home/cuser/PycharmProjects/django_apps/mysite/%s.xlsx "
                          "/home/cuser/PycharmProjects/django_apps/mysite/primerdb/archived_files/" % archived_filename)

                curs.execute("DELETE FROM Primers WHERE Gene='%s'" % gene_name)
                curs.execute("DELETE FROM Genes WHERE Gene='%s'" % gene_name)
                curs.execute("DELETE FROM SNPs WHERE Gene='%s'" % gene_name)

                info = "Data updated."

            else:
                info = "New gene added."

            # Insert new data into SQL tables.
            curs.execute("INSERT INTO Genes (Gene) VALUES (?)", (gene_name,))
            df_primertable.to_sql('Primers', con, if_exists='append', index=False)
            df_snptable.to_sql('SNPs', con, if_exists='append', index=False)

            print "Primers successfully added to database."
        else:
            info = error_details

        con.commit()
        return info, archived_filename