#!/usr/bin/env python import argparse # from libkuleshov.debug import keyboard from libkuleshov.fastx import read_fasta ############################################################################## parser = argparse.ArgumentParser() parser.add_argument('--ref') parser.add_argument('--genome') args = parser.parse_args() ############################################################################## ref_fasta = read_fasta(args.ref) with open(args.genome, 'w') as genome: for ctg, fasta in ref_fasta.items(): genome.write('%s\t%d\n' % (ctg, len(fasta)))
total = sum(w for c, w in choices) r = random.uniform(0, total) upto = 0 for c, w in choices: if upto + w >= r: return c upto += w print choices assert False, "Shouldn't get here" ############################################################################## random.seed(0) parse_fn = lambda x: int(x[1:].strip().split('|')[1]) ctg_fasta = read_fasta(args.ref, parse=parse_fn) genome_lengths = [sum([len(ctg_fasta[ctg]) for ctg in org_contigs]) for org_contigs in ctg_by_org.values()] organisms = ctg_by_org.keys() total_sequence = sum(genome_lengths) num_reads = int(total_sequence * args.cov / args.len) num_organisms = len(ctg_by_org.keys()) positions_covered = dict() for o in organisms: positions_covered[o] = dict() for ctg in ctg_by_org[o]: positions_covered[o][ctg] = set() fasta = open(args.fasta, 'w')