def initialize_containers(args):
    scaffold_tp_fp = {}
    true_breakpoints = {}
    for acc, scf in fasta_iter(open(args.scafs, 'r')):
        scaffold_tp_fp[acc] = [0, 0]
        true_breakpoints[acc] = set()
    return true_breakpoints, scaffold_tp_fp
def initialize_containers(args):
	scaffold_tp_fp = {}
	true_breakpoints = {}
	for acc, scf in fasta_iter(open(args.scafs, 'r')):
		scaffold_tp_fp[acc] = [0,0]
		true_breakpoints[acc] = set()
	return true_breakpoints, scaffold_tp_fp
Exemple #3
0
def main(args):
    if not os.path.exists(args.folder_path):
        os.mkdir(args.folder_path)
    for acc, seq in fasta.fasta_iter(open(args.genome, 'r')):
        accession, genome = acc, seq
        break

    #print seq
    dist = DistanceContainer()
    dist.parse_genome(seq, args.k)
    dist.plot()
def main(args):
	if not os.path.exists(args.folder_path):
		os.mkdir(args.folder_path)
	for acc, seq in fasta.fasta_iter(open(args.genome,'r')):
		accession,genome = acc, seq
		break

	#print seq
	dist = DistanceContainer()
	dist.parse_genome(seq,args.k)
	dist.plot()
Exemple #5
0
def simulate_instance(args):
    #print 'Started simulating'
    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)
    
    if not args.contigs:
        genome_path = os.path.join(args.output_path, 'genome.fa')
        contig_path = os.path.join(args.output_path, 'ctgs.fa')
    else:
        genome_path = args.genome
        contig_path = args.contigs

    read1_path = os.path.join(args.output_path, 'reads1.fa')
    read2_path = os.path.join(args.output_path, 'reads2.fa')
    bam_path = os.path.join(args.output_path, 'mapped')

    if not args.contigs:
        #genome
        #print args.genomelen
        g = genome.Genome([0.25]*4,args.genomelen,'genome1')
        g.genome()
        print >> open(genome_path,'w'), g.genome_fasta_format()

        #contigs
        ctgs = open(contig_path,'w')
        ctg_list = [x for x in contigs.generate_contigs(g.sequence,args.min_contig, args.max_contig, 0,3000)]
        random.shuffle( ctg_list )

        for ctg in ctg_list:
            ctgs.write(ctg)
    else:
        g = genome.Genome([0.25]*4,args.genomelen,'genome1')
        #print genome_path, args.genomelen
        longest_seq = 0
        for acc,seq in  fasta.fasta_iter(open(genome_path,'r')):
            print acc, len(seq)
            if len(seq) > longest_seq:
                g.sequence = seq
                g.accession = acc
                longest_seq = len(seq)
        print 'chosen:',g.accession
    #ctgs.write('>ctg0\n{0}\n'.format(g.sequence[0:args.burnin]))
    #for i,x in enumerate(range(args.burnin,args.genomelen,(args.contiglen + args.gaplen))):
    #	ctgs.write('>ctg{0}\n{1}\n'.format(i+1,g.sequence[x:x+args.contiglen]))

    #reads
    if args.distr == 'normal':
        lib = reads.DNAseq(args.read_length ,args.coverage, distribution=args.distr, mean=args.mean,stddev=args.sd)
        lib.simulate_pe_reads(g)
    elif args.distr == 'uniform':
        lib = reads.DNAseq(args.read_length ,args.coverage, distribution=args.distr, min_size=args.min_size,max_size=args.max_size)
        lib.simulate_pe_reads(g)
    elif args.distr == 'mix':
        lib_part1 = reads.DNAseq(args.read_length ,args.coverage/2, distribution='normal', mean=args.mean,stddev=args.sd)
        lib_part1.simulate_pe_reads(g)
        lib_part2 = reads.DNAseq(args.read_length ,args.coverage/2, distribution='uniform', min_size=(args.mean - 4*args.sd),max_size=(args.mean + 4*args.sd))
        lib_part2.simulate_pe_reads(g)
        # concatenate the reads from each distribution
        lib = reads.DNAseq(args.read_length ,args.coverage, distribution=args.distr, mean=args.mean,stddev=args.sd)
        lib.reads = lib_part1.reads + lib_part2.reads


    reads1 = open(read1_path,'w')
    reads2 = open(read2_path,'w')
    i=0
    for read in lib.fasta_format():
        if i%2==0:
            reads1.write(read)
        else:
            reads2.write(read)
        i+=1

    #print 'Started mapping'
    #mapping
    #align.map_paired_reads(read1_path, read2_path, contig_path, bam_path, args)
    align.bwa_mem(read1_path, read2_path, genome_path, bam_path, args)