Exemple #1
0
 def testNbSeq(self):
     nb_seq = FastaIO.nbSeq(self.tmp_mono_line)
     self.assertEqual(4, nb_seq)
     nb_seq = FastaIO.nbSeq(self.tmp_multi_line)
     self.assertEqual(4, nb_seq)
     nb_seq = FastaIO.nbSeq(self.tmp_multi_line_gz)
     self.assertEqual(4, nb_seq)
    args = parser.parse_args()

    # Logger initialisation
    logging.basicConfig(
        level=logging.DEBUG,
        format=
        '%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] -- %(message)s'
    )
    log = logging.getLogger(os.path.basename(__file__))
    log.info(" ".join(sys.argv))
    log.info("Random seed used: {}".format(args.random_seed))

    # Get number of duplications by reads
    log.info("Get duplication count for each read")
    random.seed(args.random_seed)
    nb_reads = FastaIO.nbSeq(args.input_R1)
    if nb_reads < 10000:
        log.error(
            "The number of reads in {} is unsufficient to simulate duplication (found: {} ; expected: {})."
            .format(args.input_R1, nb_reads, 10000))
    nb_occurences = getNbOccur(args.duplication_profile, nb_reads)

    # Witre reads
    log.info("Write reads")
    with FastaIO(args.output_R1, "w") as FH_out_R1:
        with FastaIO(args.output_R2, "w") as FH_out_R2:
            with FastaIO(args.input_R1) as FH_in_R1:
                with FastaIO(args.input_R2) as FH_in_R2:
                    for curr_nb_occur, R1, R2 in zip(nb_occurences, FH_in_R1,
                                                     FH_in_R2):
                        description = "dupCount={}".format(curr_nb_occur)