예제 #1
0
def read_self_mate_map_data_and_trim(cursor,conn,sample,species,DB_NAME,tablename,table_raw):
	conn1=mb.connect(host="localhost",user="******",passwd="123456",db=DB_NAME)
	cursor1 = conn1.cursor()
	files = ['self_refseq','mate_refseq']
	rec = ['S_R','M_R']
	for index,filename in enumerate(files):
		values = []
		cmd = 'samtools view -F 4 '+d00.get_sample_file(cursor,sample,filename)
		p1 = subprocess.Popen(cmd,shell = True,stdout=subprocess.PIPE)
		for line in p1.stdout:
			t = str.split(line)
			values.append((t[0],rec[index],'T','101M'))
		cursor1.executemany("insert into "+tablename+" values(%s,%s,%s,1000,%s) ",values);
		conn1.commit()
	files = ['mate_genome']
	rec = ['M_G']
	for index,filename in enumerate(files):
		values = []
		cmd = 'samtools view -F 4 '+d00.get_sample_file(cursor,sample,filename)
		p1 = subprocess.Popen(cmd,shell = True,stdout=subprocess.PIPE)
		for line in p1.stdout:
			t = str.split(line)
			if species == 'mm10':
				t[2] = 'chr'+t[2]
			if len(t[5])>20:
				t[5] = t[5][0:20]
			values.append((t[0],rec[index],t[2],t[3],t[5]))
		cursor1.executemany("insert into "+tablename+" values(%s,%s,%s,%s,%s) ",values);
		conn1.commit()
예제 #2
0
def FPKM_DB(cursor, conn, genes_table, samples, intype, insert, tablename):
    sql = 'create table %s select * from %s' % (tablename, genes_table)
    genes = d00.table_2_dict(cursor, genes_table, ['gene', 'gene'])
    try:
        cursor.execute(sql)
        conn.commit()
        cursor.execute("create index reci on " + tablename + "(gene);")
        conn.commit()
    except:
        print "exists"
    for sample in samples:
        exp = d00.get_sample_info(cursor, sample, 'exp') + insert
        try:
            cursor.execute("alter table " + tablename + " add " + exp +
                           " float DEFAULT '0'")
            conn.commit()
        except:
            print "EXISTS",
        fpkm = []
        f = open(d00.get_sample_file(cursor, sample, intype))
        f.readline()
        print d00.get_sample_file(cursor, sample, intype)
        for line in f:
            t = re.split('\s+', line)
            if t[9] > 0 and t[0] in genes:
                fpkm.append([t[9], t[0]])
        cursor.executemany(
            "update " + tablename + " set " + exp + "=%s where gene = %s",
            fpkm)
        conn.commit()
예제 #3
0
def FPKM_DB(cursor,conn,genes_table,samples,intype,insert,tablename):
	sql = 'create table %s select * from %s' %(tablename,genes_table)
	genes = d00.table_2_dict(cursor,genes_table,['gene','gene'])
	try:
		cursor.execute(sql)
		conn.commit()
		cursor.execute("create index reci on "+tablename+"(gene);")
		conn.commit()
	except:
		print "exists"
	for sample in samples:
		exp = d00.get_sample_info(cursor,sample,'exp')+insert
		try:
			cursor.execute("alter table "+tablename+" add "+exp+" float DEFAULT '0'")
			conn.commit()
		except:
			print "EXISTS",
		fpkm = []
		f = open(d00.get_sample_file(cursor,sample,intype))
		f.readline()
		print d00.get_sample_file(cursor,sample,intype)
		for line in f:
			t = re.split('\s+',line)
			if t[9] > 0 and t[0] in genes:
				fpkm.append([t[9],t[0]])
		cursor.executemany("update "+tablename+" set "+exp+"=%s where gene = %s",fpkm)
		conn.commit()
예제 #4
0
def BOWTIE_alignment(cursor,conn,samples,species,ref,ins,outdir,rec):
	cmds = []
	for sample in samples:
		path = outdir+'/BAM.anchor_genome.'+sample+'_'+rec+'.bam'
		fq1 = d00.get_sample_file(cursor,sample,ins[0])
		fq2 = d00.get_sample_file(cursor,sample,ins[1])
		cmd = 'bash /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/BOWTIE.pair.sh %s %s %s %s' %(fq1,fq2,path[:-4],refall[species]['bowtie2'][ref])
		cursor.execute("replace into files (sample,type,path,method)values(%s,%s,%s,%s)",[sample,rec,path,cmd])
		conn.commit()
		if not os.path.exists(path):
			cmds.append(cmd)
	return cmds
예제 #5
0
def BOWTIE_alignment(cursor,conn,samples,species,in1,in2,rec,para,outdir):
	cmds = []
	for sample in samples:
		outsam = outdir+'/SAM.anchor.'+sample+'_'+rec+'.sam'
		path = outdir+'/BAM.anchor_genome.'+sample+'_'+rec+'.bam'
		fq1 = d00.get_sample_file(cursor,sample,in1)
		fq2 = d00.get_sample_file(cursor,sample,in2)
		cmd = 'bash /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/cRNA/BOWTIE.pair.sh %s %s %s %s %s %s' %(fq1,fq2,outsam,path[:-4],Ref[species]['bowtie2']['genome'],para)
		cursor.execute("replace into files (sample,type,path,method)values(%s,%s,%s,%s)",[sample,rec,path,cmd])
		conn.commit()
		if not os.path.exists(path):
			cmds.append(cmd)
	return cmds
예제 #6
0
def BWA_PAIRED(cursor,conn,specise,ref,samples,intype,folder,rec):
	cmds = []
	for sample in samples:
		insert = rec+'_'+sample
		fq1 = d00.get_sample_file(cursor,sample,intype[0])
		fq2 = d00.get_sample_file(cursor,sample,intype[1])
		path = folder+'/BAM'+insert+'.bam'
		cmd = 'bash /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/BWA.pair.sh ' +folder+' '+fq1+' '+fq2\
				+' '+refall[specise]['bwa'][ref]+" "+path[:-4]+" "+insert
		cmd = add_files(cmd,"/data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/BWA.pair.sh")
		cursor.execute("replace into files (sample,type,path,method)values(%s,%s,%s,%s) ",[sample,rec,path,cmd])
		cmds.append(cmd)
	conn.commit()
	return cmds
예제 #7
0
def BOWTIE_PAIRED(cursor,conn,samples,species,ref,ins,outdir,usage,rec,server="TANG"):
	cmds = []
	for sample in samples:
		path = outdir+'/BAM.'+rec+'_'+sample+'.bam'
		fq1 = d00.get_sample_file(cursor,sample,ins[0])
		fq2 = d00.get_sample_file(cursor,sample,ins[1])
		refpath = d00.get_ref(cursor,species,'bowtie2',ref,server)
		(scriptpath,scriptcmds) = d00.get_script(cursor,'bowtie_pair',server)
		cmd = 'bash %s %s %s %s %s %s' %(scriptpath,fq1,fq2,path[:-4],refpath,usage)
		method = cmd+" \n"+scriptcmds
		cursor.execute("replace into files (sample,type,path,method,server)values(%s,%s,%s,%s,%s)",[sample,rec,path,method,server])
		conn.commit()
		if not os.path.exists(path):
			cmds.append(cmd)
	return cmds
예제 #8
0
def TOPHAT_PAIRED(cursor,conn,specise,ref,samples,intype,report,folder,rec,server="TANG"):
	cmds = []
	for sample in samples:
		insert = rec+'_'+sample
		outdir = folder+'/'+insert
		fq1 = d00.get_sample_file(cursor,sample,intype[0])
		fq2 = d00.get_sample_file(cursor,sample,intype[1])
		path = outdir+'/BAM'+insert+'.bam'
		refpath = d00.get_ref(cursor,specise,'bowtie2',ref,server)
		cmd = "bash %sscripts/TOPHAT.pair.sh %s %s %s %s %s %s %s" %(dirname,outdir,fq1,fq2,refpath,path[:-4],insert,report)
		method = add_files(cmd,dirname+"scripts/TOPHAT.pair.sh")
		cursor.execute("replace into files (sample,type,path,method)values(%s,%s,%s,%s)",[sample,rec,path,method])
		cmds.append(cmd)
	conn.commit()
	return cmds
예제 #9
0
def BWA_PAIRED(cursor,conn,specise,ref,samples,intype,folder,rec,server="TANG"):
	cmds = []
	for sample in samples:
		insert = rec+'_'+sample
		fq1 = d00.get_sample_file(cursor,sample,intype[0])
		fq2 = d00.get_sample_file(cursor,sample,intype[1])
		path = folder+'/BAM'+insert+'.bam'
		(scriptpath,scriptcmds) = d00.get_script(cursor,'bwa_pair',server) 
		refpath = d00.get_ref(cursor,specise,'bwa',ref,server)
		cmd = "bash %s %s %s %s %s %s %s" %(scriptpath,folder,fq1,fq2,refpath,path[:-4],insert)
		method = cmd+" \n"+scriptcmds
		cursor.execute("replace into files (sample,type,path,method,server) values(%s,%s,%s,%s,%s) ",[sample,rec,path,method,server])
		cmds.append(cmd)
	conn.commit()
	return cmds
예제 #10
0
def BOWTIE_alignment(cursor, conn, samples, species, ref, ins, outdir, rec):
    cmds = []
    for sample in samples:
        path = outdir + '/BAM.anchor_genome.' + sample + '_' + rec + '.bam'
        fq1 = d00.get_sample_file(cursor, sample, ins[0])
        fq2 = d00.get_sample_file(cursor, sample, ins[1])
        cmd = 'bash /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/BOWTIE.pair.sh %s %s %s %s' % (
            fq1, fq2, path[:-4], refall[species]['bowtie2'][ref])
        cursor.execute(
            "replace into files (sample,type,path,method)values(%s,%s,%s,%s)",
            [sample, rec, path, cmd])
        conn.commit()
        if not os.path.exists(path):
            cmds.append(cmd)
    return cmds
예제 #11
0
def circularRNA_tophat_pair(cursor,conn,samples,species,ref,insert,out,in1,in2):
	cmds = []
	for sample in samples:
		outdir = out+'/'+sample
		if not os.path.exists(outdir):
			os.mkdir(outdir)
		path = outdir+'/BAM_PE_'+sample+'_'+insert+'.bam'
		c = 'bash /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/cRNA_TOPHAT.pair.sh '
		fq1 = d00.get_sample_file(cursor,sample,in1)
		fq2 = d00.get_sample_file(cursor,sample,in2)
		cmd = "%s %s %s %s %s %s %s" %(c,outdir,fq1,fq2,path[:-4],insert,refall[species]['bowtie2'][ref])
		if not os.path.exists(path):
			cmds.append(cmd)
		cursor.execute("insert ignore into files values(%s,%s,%s,NULL,%s)",[sample,'BAM_PE_CRNA_SORT',path,cmd])
		conn.commit()
	return cmds
예제 #12
0
def GET_BINNED_COUNTS_ALONG_GENOME_Multiple(cursor,conn,samples,bamfile,binsize,chrom_size,tablename):
  print tablename,
  colnames = ['chr','bin_id']+samples
  types = ['varchar(20)','int']+['int']*len(samples)
  print colnames,types
  in05.CREATE_TABLE(cursor,tablename,colnames,types)
  cursor.execute("select chr,`length` from "+chrom_size)
  results = cursor.fetchall()

  samfiles = []
  for sample in samples:
    bamname = d00.get_sample_file(cursor,sample,bamfile)
    samfiles.append(pysam.Samfile(bamname, "rb"))

  counts = []
  for chr in results:
    for pos in range(1,int(chr[1]),binsize):
      bin_info = [chr[0],int(pos/binsize)]
      for samfile in samfiles:
        bin_info.append(samfile.count(chr[0],pos,min(pos+binsize,int(chr[1]))))
      counts.append(bin_info)
  up_cmd = "insert ignore into "+tablename+" values("+" %s,"*len(colnames)
  up_cmd = up_cmd[0:len(up_cmd)-1]+")"
  print up_cmd
  cursor.executemany(up_cmd,counts)
  conn.commit()
예제 #13
0
def summarize_pair_map_conditions_db(cursor,samples,dbname,tablename,in1,dist,anchor_length):
	cmds = []
	for sample in samples:
		bamfile = d00.get_sample_file(cursor,sample,in1)
		cmd = 'python /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/cRNA/summarize_reads_db.py %s %s %s %s %s %s' %(dbname,tablename,bamfile,dist,sample,anchor_length)
		cmds.append(cmd)
	return cmds
예제 #14
0
def RPKM_DB(cursor,conn,g_t,gene_length,totalreads,samples,intype,insert,tablename):
	sql = 'create table %s select * from %s' %(tablename,gene_length)
	try:
		cursor.execute(sql)
		conn.commit()
		cursor.execute("create index reci on "+tablename+"(gene);")
		conn.commit()
	except:
		print "exists"
	gt = d00.table_2_dict(cursor,g_t,['transc','gene'])
	gl = d00.table_2_dict(cursor,gene_length,['gene','length'])
	for sample in samples:
		total = int(d00.get_sample_info(cursor,sample,totalreads))
		count = {}
		exp = d00.get_sample_info(cursor,sample,'exp')+insert
		try:
			cursor.execute("alter table "+tablename+" add "+exp+" float DEFAULT '0'")
		except:
			print "EXISTS_colume"
		f = open(d00.get_sample_file(cursor,sample,intype))
		for line in f:
			t = re.split('\s+',line)
			if t[0] not in gt:
				continue
			gene = gt[t[0]]
			if gene in gl:
				if gene not in count:
					count[gene] = 0
				count[gene] += int(t[1])
		values = []
		for gene in count:
			values.append([float(count[gene]*1000000*1000)/(total*int(gl[gene])),gene])
		print len(values),values[1]
		cursor.executemany("update "+tablename+" set "+exp+"=%s where gene = %s",values)
		conn.commit()
예제 #15
0
def FILES_GROUPER(cursor,conn,samples,newsName,intype,sep):
	newname = ""
	for sample in samples:
		newname += d00.get_sample_file(cursor,sample,intype)+sep
	print newname[:-1]
	method = "FILES_GROUPER #"+" "+intype
	cursor.execute("replace into files (sample,type,path,method)values(%s,%s,%s,%s)",[newsName,intype,newname,method])
	conn.commit()
예제 #16
0
def circularRNA_tophat_pair(cursor,conn,samples,species,ref,out,in1,in2,rec,server="TANG"):
	cmds = []
	for sample in samples:
		outdir = out+'/'+rec+"_"+sample
		if not os.path.exists(outdir):
			os.mkdir(outdir)
		path = outdir+'/'+rec+"_"+sample+'.bam'
		c = 'bash /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/cRNA_TOPHAT.pair.sh '
		fq1 = d00.get_sample_file(cursor,sample,in1)
		fq2 = d00.get_sample_file(cursor,sample,in2)
		refpath = d00.get_ref(cursor,species,'bowtie2',ref,server)
		cmd = "%s %s %s %s %s %s %s" %(c,outdir,fq1,fq2,path[:-4],rec,refpath)
		if not os.path.exists(path):
			cmds.append(cmd)
		cursor.execute("insert ignore into files (sample,type,path,method)values(%s,%s,%s,%s)",[sample,rec,path,cmd])
		conn.commit()
	return cmds
예제 #17
0
def BOWTIE_alignment(cursor, conn, samples, species, in1, in2, rec, para,
                     outdir):
    cmds = []
    for sample in samples:
        outsam = outdir + '/SAM.anchor.' + sample + '_' + rec + '.sam'
        path = outdir + '/BAM.anchor_genome.' + sample + '_' + rec + '.bam'
        fq1 = d00.get_sample_file(cursor, sample, in1)
        fq2 = d00.get_sample_file(cursor, sample, in2)
        cmd = 'bash /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/cRNA/BOWTIE.pair.sh %s %s %s %s %s %s' % (
            fq1, fq2, outsam, path[:-4], Ref[species]['bowtie2']['genome'],
            para)
        cursor.execute(
            "replace into files (sample,type,path,method)values(%s,%s,%s,%s)",
            [sample, rec, path, cmd])
        conn.commit()
        if not os.path.exists(path):
            cmds.append(cmd)
    return cmds
예제 #18
0
def summarize_pair_map_conditions_db(cursor, samples, dbname, tablename, in1,
                                     dist, anchor_length):
    cmds = []
    for sample in samples:
        bamfile = d00.get_sample_file(cursor, sample, in1)
        cmd = 'python /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/cRNA/summarize_reads_db.py %s %s %s %s %s %s' % (
            dbname, tablename, bamfile, dist, sample, anchor_length)
        cmds.append(cmd)
    return cmds
예제 #19
0
def BAM_FLAGSTAT(cursor,conn,samples,intype,folder,rec):
	cmds = []
	for sample in samples:
		path = folder+'/flagstat.'+sample+"_"+intype+'.txt'
		f1 = d00.get_sample_file(cursor,sample,intype)
		cmd = 'samtools flagstat '+f1+' > '+path
		cursor.execute("replace into files (sample,type,path,method)values(%s,%s,%s,%s)",[sample,rec,path,cmd])
		cmds.append(cmd)
	conn.commit()
	return cmds
예제 #20
0
def circularRNA_tophat_pair(cursor, conn, samples, species, ref, insert, out,
                            in1, in2):
    cmds = []
    for sample in samples:
        outdir = out + '/' + sample
        if not os.path.exists(outdir):
            os.mkdir(outdir)
        path = outdir + '/BAM_PE_' + sample + '_' + insert + '.bam'
        c = 'bash /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/cRNA_TOPHAT.pair.sh '
        fq1 = d00.get_sample_file(cursor, sample, in1)
        fq2 = d00.get_sample_file(cursor, sample, in2)
        cmd = "%s %s %s %s %s %s %s" % (c, outdir, fq1, fq2, path[:-4], insert,
                                        refall[species]['bowtie2'][ref])
        if not os.path.exists(path):
            cmds.append(cmd)
        cursor.execute("insert ignore into files values(%s,%s,%s,NULL,%s)",
                       [sample, 'BAM_PE_CRNA_SORT', path, cmd])
        conn.commit()
    return cmds
예제 #21
0
def BWA_PAIRED(cursor, conn, specise, ref, samples, intype, folder, rec):
    cmds = []
    for sample in samples:
        insert = rec + '_' + sample
        fq1 = d00.get_sample_file(cursor, sample, intype[0])
        fq2 = d00.get_sample_file(cursor, sample, intype[1])
        path = folder + '/BAM' + insert + '.bam'
        cmd = 'bash /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/BWA.pair.sh ' +folder+' '+fq1+' '+fq2\
          +' '+refall[specise]['bwa'][ref]+" "+path[:-4]+" "+insert
        cmd = add_files(
            cmd,
            "/data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/BWA.pair.sh"
        )
        cursor.execute(
            "replace into files (sample,type,path,method)values(%s,%s,%s,%s) ",
            [sample, rec, path, cmd])
        cmds.append(cmd)
    conn.commit()
    return cmds
예제 #22
0
def SUMMARIZE(cursor,conn,samples,intype,folder,para,rec):
	cmds = []
	for sample in samples:
		insert = rec+'_'+sample
		bam = d00.get_sample_file(cursor,sample,intype)
		path = folder+'/summarize.'+insert+'.txt'
		cmd = 'bash /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/SUMMARIZE.sh '+bam+' '+path+' '+para
		cursor.execute("replace into files (sample,type,path,method)values(%s,%s,%s,%s)",[sample,rec,path,cmd])
		cmds.append(cmd)
	conn.commit()
	return cmds
예제 #23
0
def MAPPED_SINGLE(cursor,conn,samples,bamtype,folder,rec):
	cmds = []
	for sample in samples:
		bam = d00.get_sample_file(cursor,sample,bamtype)
		path = folder+'/'+rec+'_'+sample+'.fa.gz'
		cmd = 'python /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/assembly/mapped_reads_single.py ' +bam+' '+path+' '+sample
		method = add_files(cmd,"/data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/assembly/mapped_reads_single.py")
		cursor.execute("replace into files (sample,type,path,method)values(%s,%s,%s,%s)",[sample,rec,path,method])
		cmds.append(cmd)
	conn.commit()
	return cmds
예제 #24
0
def BAM_FLAGSTAT(cursor, conn, samples, intype, folder, rec):
    cmds = []
    for sample in samples:
        path = folder + '/flagstat.' + sample + "_" + intype + '.txt'
        f1 = d00.get_sample_file(cursor, sample, intype)
        cmd = 'samtools flagstat ' + f1 + ' > ' + path
        cursor.execute(
            "replace into files (sample,type,path,method)values(%s,%s,%s,%s)",
            [sample, rec, path, cmd])
        cmds.append(cmd)
    conn.commit()
    return cmds
예제 #25
0
def SUMMARIZE(cursor,conn,samples,intype,folder,para,rec,server="TANG"):
	cmds = []
	for sample in samples:
		insert = rec+'_'+sample
		bam = d00.get_sample_file(cursor,sample,intype)
		path = folder+'/summarize.'+insert+'.txt'
		(scriptpath,scriptcmds) = d00.get_script(cursor,'summarize',server)
		cmd = 'bash %s %s %s %s' %(scriptpath,bam,path,para)
		cursor.execute("replace into files (sample,type,path,method,server)values(%s,%s,%s,%s,%s)",[sample,rec,path,cmd,server])
		cmds.append(cmd)
	conn.commit()
	return cmds
예제 #26
0
def Read_BWA_flagstat(cursor,conn,samples,intype,recs):
	d02.check_table_colume(cursor,conn,'samples',recs[0],'INT')
	d02.check_table_colume(cursor,conn,'samples',recs[1],'INT')
	for sample in samples:
		f1 = open(d00.get_sample_file(cursor,sample,intype))
		a = re.split('\s+',f1.readline())[0]
		f1.readline()
		b = re.split('\s+',f1.readline())[0]
		sql = "update %s set %s=%s,%s=%s where sample='%s'" %('samples',recs[0],a,recs[1],b,sample)
		print sql
		cursor.execute(sql)
	conn.commit()
예제 #27
0
def read_VCF_file_single(cursor, conn, DB_NAME, tablename, samples, type):
    limit = 30
    sample_infos = ''
    for sample in samples:
        sample_info = " %s_DP varchar(5) DEFAULT '0',%s_alt float DEFAULT '0'," % (
            sample, sample)
        sample_infos += sample_info
    sql = """CREATE TABLE %s (
	  `chr` varchar(20) NOT NULL DEFAULT '',
	  `pos` int(11) NOT NULL DEFAULT '0',
	  `Ref` varchar(30) DEFAULT NULL,
	  `Alt` varchar(30) NOT NULL DEFAULT '',
	  %s
	  PRIMARY KEY (`chr`,`pos`,`Alt`)
	) ENGINE=InnoDB DEFAULT CHARSET=latin1""" % (tablename, sample_infos)
    print sql
    try:
        cursor.execute(sql)
    except:
        print "EXISTS"
    for sample in samples:
        path = d00.get_sample_file(cursor, sample, type)
        file = open(path)
        values = []
        for line in file:
            if re.search('#', line):
                continue
            t = re.split('\s*', line)
            info = {}
            for i in re.split(';', t[7]):
                a = re.split('=', i)
                if len(a) > 1:
                    info[a[0]] = a[1]
            if 'DP4' not in info:
                continue
            DP4 = re.split(',', info['DP4'])
            if len(t[3]) > limit:
                t[3] = t[3][0:limit]
                continue
            if len(t[4]) > limit:
                t[4] = t[4][0:limit]
                continue
            value = (t[0], t[1], t[3], t[4], info['DP'],
                     float(int(DP4[2]) + int(DP4[3])) / int(info['DP']))
            values.append(value)
        cmd = "insert into %s (chr,pos,Ref,Alt,%s,%s)values(%%s,%%s,%%s,%%s,%%s,%%s) on duplicate key update %s=values(%s),%s=values(%s)" % (
            tablename, sample + '_DP', sample + '_alt', sample + '_DP',
            sample + '_DP', sample + '_alt', sample + '_alt')
        print cmd, values[0]
        cursor.executemany(cmd, values)
        conn.commit()
    cursor.close()
    conn.close()
예제 #28
0
def Read_BWA_flagstat(cursor, conn, samples, intype, recs):
    d02.check_table_colume(cursor, conn, 'samples', recs[0], 'INT')
    d02.check_table_colume(cursor, conn, 'samples', recs[1], 'INT')
    for sample in samples:
        f1 = open(d00.get_sample_file(cursor, sample, intype))
        a = re.split('\s+', f1.readline())[0]
        f1.readline()
        b = re.split('\s+', f1.readline())[0]
        sql = "update %s set %s=%s,%s=%s where sample='%s'" % (
            'samples', recs[0], a, recs[1], b, sample)
        print sql
        cursor.execute(sql)
    conn.commit()
예제 #29
0
def SUMMARIZE(cursor, conn, samples, intype, folder, para, rec):
    cmds = []
    for sample in samples:
        insert = rec + '_' + sample
        bam = d00.get_sample_file(cursor, sample, intype)
        path = folder + '/summarize.' + insert + '.txt'
        cmd = 'bash /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/SUMMARIZE.sh ' + bam + ' ' + path + ' ' + para
        cursor.execute(
            "replace into files (sample,type,path,method)values(%s,%s,%s,%s)",
            [sample, rec, path, cmd])
        cmds.append(cmd)
    conn.commit()
    return cmds
예제 #30
0
def BWA_SINGLE(cursor,conn,specise,ref,samples,intype,folder,rec):
	cmds = []
	for sample in samples:
		insert = rec+'_'+sample
		fq = d00.get_sample_file(cursor,sample,intype)
		path = folder+'/BAM'+insert+'.bam'
		refpath = d00.get_ref(cursor,specise,'bwa',ref,server)
		cmd = "bash %sscripts/BWA.pair.sh %s %s %s %s %s" %(dirname,folder,fq,refpath,path[:-4],insert)
		method = add_files(cmd,dirname+"scripts/BWA.single.sh")
		cursor.execute("insert ignore into files (sample,type,path,method)values(%s,%s,%s,%s) ",[sample,rec,path,method])
		cmds.append(cmd)
	conn.commit()
	return cmds
예제 #31
0
def CUFFLINKS(cursor,conn,species,ref,samples,intype,folder,rec):
	cmds = []
	for sample in samples:
		insert = rec+'_'+sample
		outdir = folder+'/'+insert
		if not os.path.exists(outdir):
			os.mkdir(outdir)
		bam = d00.get_sample_file(cursor,sample,intype)
		path = outdir+'/genes.fpkm_tracking'
		cmd = 'cufflinks -o %s -p 6 -G %s %s' %(outdir,refall[species]['gtf'][ref],bam)
		cursor.execute("replace into files (sample,type,path,method)values(%s,%s,%s,%s)",[sample,rec,path,cmd])
		cmds.append(cmd)
	conn.commit()
	return cmds
예제 #32
0
def TOPHAT_SINGLE(cursor,conn,specise,ref,samples,intype,folder,report,rec):
	cmds = []
	for sample in samples:
		insert = rec+'_'+sample
		outdir = folder+'/'+insert
		bam = d00.get_sample_file(cursor,sample,intype)
		path = outdir+'/BAM'+insert+'.bam'
		cmd = 'bash /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/TOPHAT.single.sh ' +outdir+' '+bam\
				+' '+refall[specise]['bowtie2'][ref]+" "+path[:-4]+" "+insert+" "+report
		method = add_files(cmd,"/data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/TOPHAT.single.sh")
		cursor.execute("replace into files (sample,type,path,method)values(%s,%s,%s,%s)",[sample,rec,path,method])
		cmds.append(cmd)
	conn.commit()
	return cmds
예제 #33
0
def read_self_mate_map_data_and_trim(cursor, conn, sample, species, DB_NAME,
                                     tablename, table_raw):
    conn1 = mb.connect(host="localhost",
                       user="******",
                       passwd="123456",
                       db=DB_NAME)
    cursor1 = conn1.cursor()
    files = ['self_refseq', 'mate_refseq']
    rec = ['S_R', 'M_R']
    for index, filename in enumerate(files):
        values = []
        cmd = 'samtools view -F 4 ' + d00.get_sample_file(
            cursor, sample, filename)
        p1 = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
        for line in p1.stdout:
            t = str.split(line)
            values.append((t[0], rec[index], 'T', '101M'))
        cursor1.executemany(
            "insert into " + tablename + " values(%s,%s,%s,1000,%s) ", values)
        conn1.commit()
    files = ['mate_genome']
    rec = ['M_G']
    for index, filename in enumerate(files):
        values = []
        cmd = 'samtools view -F 4 ' + d00.get_sample_file(
            cursor, sample, filename)
        p1 = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
        for line in p1.stdout:
            t = str.split(line)
            if species == 'mm10':
                t[2] = 'chr' + t[2]
            if len(t[5]) > 20:
                t[5] = t[5][0:20]
            values.append((t[0], rec[index], t[2], t[3], t[5]))
        cursor1.executemany(
            "insert into " + tablename + " values(%s,%s,%s,%s,%s) ", values)
        conn1.commit()
예제 #34
0
def read_VCF_file_single(cursor,conn,DB_NAME,tablename,samples,type):
	limit = 30
	sample_infos = ''
	for sample in samples:
		sample_info = " %s_DP varchar(5) DEFAULT '0',%s_alt float DEFAULT '0'," %(sample,sample)
		sample_infos += sample_info
	sql = """CREATE TABLE %s (
	  `chr` varchar(20) NOT NULL DEFAULT '',
	  `pos` int(11) NOT NULL DEFAULT '0',
	  `Ref` varchar(30) DEFAULT NULL,
	  `Alt` varchar(30) NOT NULL DEFAULT '',
	  %s
	  PRIMARY KEY (`chr`,`pos`,`Alt`)
	) ENGINE=InnoDB DEFAULT CHARSET=latin1""" %(tablename,sample_infos)
	print sql
	try:
		cursor.execute(sql)
	except:
		print "EXISTS"
	for sample in samples:
		path = d00.get_sample_file(cursor,sample,type)
		file = open(path)
		values = []
		for line in file:
			if re.search('#',line):
				continue
			t = re.split('\s*',line)
			info = {} 
			for i in re.split(';',t[7]):
				a = re.split('=',i)
				if len(a)>1:
					info[a[0]] = a[1]
			if 'DP4' not in info:
				continue
			DP4 = re.split(',',info['DP4'])
			if len(t[3])>limit:
				t[3]=t[3][0:limit]
				continue
			if len(t[4])>limit:
				t[4]=t[4][0:limit]
				continue
			value = (t[0],t[1],t[3],t[4],info['DP'],float(int(DP4[2])+int(DP4[3]))/int(info['DP']))
			values.append(value)
		cmd = "insert into %s (chr,pos,Ref,Alt,%s,%s)values(%%s,%%s,%%s,%%s,%%s,%%s) on duplicate key update %s=values(%s),%s=values(%s)" %(tablename,sample+'_DP',sample+'_alt',sample+'_DP',sample+'_DP',sample+'_alt',sample+'_alt')
		print cmd,values[0]
		cursor.executemany(cmd,values)
		conn.commit()
	cursor.close()
	conn.close()
예제 #35
0
def MAPPED_SINGLE(cursor, conn, samples, bamtype, folder, rec):
    cmds = []
    for sample in samples:
        bam = d00.get_sample_file(cursor, sample, bamtype)
        path = folder + '/' + rec + '_' + sample + '.fa.gz'
        cmd = 'python /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/assembly/mapped_reads_single.py ' + bam + ' ' + path + ' ' + sample
        method = add_files(
            cmd,
            "/data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/assembly/mapped_reads_single.py"
        )
        cursor.execute(
            "replace into files (sample,type,path,method)values(%s,%s,%s,%s)",
            [sample, rec, path, method])
        cmds.append(cmd)
    conn.commit()
    return cmds
예제 #36
0
def makes_anchors_fq(cursor,conn,samples,bamfile,outdir,length,insert,rec):
	sql = []
	cmds = []
	for sample in samples:
		anchor1 = outdir+'/Anchor.'+insert+'_'+sample+'.1.fq.gz'
		anchor2 = outdir+'/Anchor.'+insert+'_'+sample+'.2.fq.gz'
		if os.path.exists(anchor1):
			print "ALREADY EXISTS"
		else:
			bam_pe = d00.get_sample_file(cursor,sample,bamfile) 
			cmd = 'python /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/cRNA/make_anchors.py %s %s %s %s' %(bam_pe,anchor1,anchor2,length)
			cmds.append(cmd)
		sql.append([sample,rec[0],anchor1,cmd])
		sql.append([sample,rec[1],anchor2,cmd])
	cursor.executemany("replace into files (sample,type,path,method)values(%s,%s,%s,%s)",sql)
	conn.commit()
	return cmds
예제 #37
0
def CUFFLINKS(cursor, conn, species, ref, samples, intype, folder, rec):
    cmds = []
    for sample in samples:
        insert = rec + '_' + sample
        outdir = folder + '/' + insert
        if not os.path.exists(outdir):
            os.mkdir(outdir)
        bam = d00.get_sample_file(cursor, sample, intype)
        path = outdir + '/genes.fpkm_tracking'
        cmd = 'cufflinks -o %s -p 6 -G %s %s' % (
            outdir, refall[species]['gtf'][ref], bam)
        cursor.execute(
            "replace into files (sample,type,path,method)values(%s,%s,%s,%s)",
            [sample, rec, path, cmd])
        cmds.append(cmd)
    conn.commit()
    return cmds
예제 #38
0
def RPKM_DB(cursor, conn, g_t, gene_length, totalreads, samples, intype,
            insert, tablename):
    sql = 'create table %s select * from %s' % (tablename, gene_length)
    try:
        cursor.execute(sql)
        conn.commit()
        cursor.execute("create index reci on " + tablename + "(gene);")
        conn.commit()
    except:
        print "exists"
    gt = d00.table_2_dict(cursor, g_t, ['transc', 'gene'])
    gl = d00.table_2_dict(cursor, gene_length, ['gene', 'length'])
    for sample in samples:
        total = int(d00.get_sample_info(cursor, sample, totalreads))
        count = {}
        exp = d00.get_sample_info(cursor, sample, 'exp') + insert
        try:
            cursor.execute("alter table " + tablename + " add " + exp +
                           " float DEFAULT '0'")
        except:
            print "EXISTS_colume"
        f = open(d00.get_sample_file(cursor, sample, intype))
        for line in f:
            t = re.split('\s+', line)
            if t[0] not in gt:
                continue
            gene = gt[t[0]]
            if gene in gl:
                if gene not in count:
                    count[gene] = 0
                count[gene] += int(t[1])
        values = []
        for gene in count:
            values.append([
                float(count[gene] * 1000000 * 1000) / (total * int(gl[gene])),
                gene
            ])
        print len(values), values[1]
        cursor.executemany(
            "update " + tablename + " set " + exp + "=%s where gene = %s",
            values)
        conn.commit()
예제 #39
0
def TOPHAT_SINGLE(cursor, conn, specise, ref, samples, intype, folder, report,
                  rec):
    cmds = []
    for sample in samples:
        insert = rec + '_' + sample
        outdir = folder + '/' + insert
        bam = d00.get_sample_file(cursor, sample, intype)
        path = outdir + '/BAM' + insert + '.bam'
        cmd = 'bash /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/TOPHAT.single.sh ' +outdir+' '+bam\
          +' '+refall[specise]['bowtie2'][ref]+" "+path[:-4]+" "+insert+" "+report
        method = add_files(
            cmd,
            "/data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/scripts/TOPHAT.single.sh"
        )
        cursor.execute(
            "replace into files (sample,type,path,method)values(%s,%s,%s,%s)",
            [sample, rec, path, method])
        cmds.append(cmd)
    conn.commit()
    return cmds
예제 #40
0
def makes_anchors_fq(cursor, conn, samples, bamfile, outdir, length, insert,
                     rec):
    sql = []
    cmds = []
    for sample in samples:
        anchor1 = outdir + '/Anchor.' + insert + '_' + sample + '.1.fq.gz'
        anchor2 = outdir + '/Anchor.' + insert + '_' + sample + '.2.fq.gz'
        if os.path.exists(anchor1):
            print "ALREADY EXISTS"
        else:
            bam_pe = d00.get_sample_file(cursor, sample, bamfile)
            cmd = 'python /data/Analysis/fanxiaoying/project/project01_polyA-RNAseq/modules/cRNA/make_anchors.py %s %s %s %s' % (
                bam_pe, anchor1, anchor2, length)
            cmds.append(cmd)
        sql.append([sample, rec[0], anchor1, cmd])
        sql.append([sample, rec[1], anchor2, cmd])
    cursor.executemany(
        "replace into files (sample,type,path,method)values(%s,%s,%s,%s)", sql)
    conn.commit()
    return cmds
예제 #41
0
def COMMAND_generator(cursor,conn,samples,template,infiles,folder,suffix,rec):
	cmds = []
	for sample in samples:
		lists = []
		for i in infiles:
			lists.append(d00.get_sample_file(cursor,sample,i))
		path = folder+'/'+rec+'_'+sample+suffix
		if suffix=='/':
			path = path[:-1]
		if suffix=='/' and not os.path.exists(path):
			os.mkdir(path)
		lists.append(path)
		cmd = template
		for i in range(len(lists)):
			cmd = cmd.replace("#"+str(i),lists[i])
		cmd = cmd.replace("#sample",sample)
		if not (rec == '' or rec == 'NA'):
			cursor.execute("replace into files (sample,type,path,method)values(%s,%s,%s,%s)",[sample,rec,path,cmd])
		cmds.append(cmd)
	conn.commit()
	return cmds
예제 #42
0
def Samples_Bam_Handles(cursor,conn,samples,bamtype):
	bamfiles = []
	for sample in samples:
		file_path = d00.get_sample_file(cursor,sample,bamtype)
		bamfiles.append(pysam.Samfile(file_path, "rb"))
	return bamfiles