Ejemplo n.º 1
0
    def sql_parse_dna_seq(datafile):
        '''Convert data into an SQL table insertion
        input: file with each dna seq on a seperate line
        output: read each line as seperate string and write to SQL format
        '''
        count=0
        raw_data=[]
        clean_data=''
        acc_data=[]
        acc_count=0
        dna_count=0

        with open(datafile, 'r') as f:
            raw_data+=f.readlines()
            for line in raw_data:

                if acc_count==0:
                    clean_data+= "INSERT into CHROM8(ACCESSION, DNA_SEQUENCE) values ("+cl.clean_lines("'"+line+"',")
                    acc_count+=1
                    dna_count=0

                elif dna_count==0:
                    clean_data+= cl.clean_lines("'"+line+"')ON DUPLICATE KEY UPDATE DNA_SEQUENCE = '"+line+"';")+"\n"
                    dna_count+=1
                    acc_count=0
            
        return (clean_data)
Ejemplo n.º 2
0
    def sql_parse_prot_trans(datafile):
        '''This function converts the output of parse_prot_trans(),
        input: text file with accession number and protein trnaslation in alternating lines
        output: returns an SQL format insertion for a database
        '''
        count=0
        raw_data=[]
        clean_data=''
        acc_data=[]
        acc_count=0
        trans_count=1

        with open(datafile, 'r') as f:
            raw_data+=f.readlines()
            for line in raw_data:

                if acc_count==0:
                    clean_data+= "INSERT into CHROM8(ACCESSION, TRANSLATION) values ("+cl.clean_lines("'"+line+"',")
                    acc_count+=1
                    trans_count=0

                elif trans_count==0:
                    clean_data+= cl.clean_lines("'"+line+"')ON DUPLICATE KEY UPDATE TRANSLATION = '"+line+"';")+"\n"
                    trans_count+=1
                    acc_count=0
            
        return (clean_data)