Ejemplo n.º 1
0
if __name__ == "__main__":
    multiprocessing.freeze_support()
    p = multiprocessing.Pool(args.procs)
    manager = multiprocessing.Manager()
    amplicon_file = open(args.ids, "r")
    amplicons = []
    for line in amplicon_file.readlines():
        amplicons.append(line.rstrip())
    if (args.verbose is True):
        print amplicons
    amplicon_file.close()
    fasta_file = args.fasta
    model_file_template = args.temp_model
    model_file_complement = args.comp_model
    model_kmer_means_template,tempkmerlen=process_model_file(model_file_template)
    model_kmer_means_complement,compkmerlen=process_model_file(model_file_complement)
    kmerhashT = process_ref_fasta_raw(fasta_file,model_kmer_means_template,tempkmerlen)
    kmerhashC = process_ref_fasta_raw(fasta_file,model_kmer_means_complement,compkmerlen)
    seqlengths = get_seq_len(fasta_file)
    get_amplicons()

    ampdict=[]
    ampstartdict=dict()
    ampenddict=dict()
    counter = 0
    procampres=manager.dict()

    for amplicon in amplicons:
    	counter+=1
    	ampstart = int(float(amplicon.split(':', 1 )[1].split('-',1)[0]))
Ejemplo n.º 2
0
    if not os.path.isdir(args.watchdir):
        logger.error("Folder" + args.watchdir + " cannot be found. Exiting.")
        sys.exit()

    # Multiprocess setup
    p = multiprocessing.Pool(args.procs)
    manager = multiprocessing.Manager()
    procampres = manager.dict()
    fasta_file = args.fasta
    seqlen = get_seq_len(fasta_file)

    # Process model and reference fasta
    model_file = args.temp_model
    global model_kmer_means
    global kmer_len
    model_kmer_means, kmer_len = process_model_file(model_file)
    seqids, threedarray = process_ref_fasta(fasta_file, model_kmer_means,
                                            kmer_len)

    # Scrap filenames
    d = list()
    filenamecounter = 0
    for filename in glob.glob(os.path.join(args.watchdir, '*.fast5')):
        filenamecounter += 1
        d.append([filename, seqids, threedarray, procampres, seqlen, args])
    for filename in glob.glob(os.path.join(args.watchdir, "pass", '*.fast5')):
        filenamecounter += 1
        d.append([filename, seqids, threedarray, procampres, seqlen, args])
    for filename in glob.glob(os.path.join(args.watchdir, "fail", '*.fast5')):
        filenamecounter += 1
        d.append([filename, seqids, threedarray, procampres, seqlen, args])
Ejemplo n.º 3
0
    for record in SeqIO.parse(args.fasta, 'fasta'):
        print record.id
        idlist.append(record.id)

    idsconcat = " ".join(idlist)
    for amplicon in amplicons:
        if amplicon.split(':')[0] not in idsconcat:
            print "!** At least one amplicon is not in your reference sequence.\n\r Please check:"
            print amplicon
            print "!** This program will now exit.\n"
            sys.exit()

    fasta_file = args.fasta
    model_file_template = args.temp_model

    model_kmer_means_template, kmer_len = process_model_file(
        model_file_template)

    kmerhashT = process_ref_fasta_raw(fasta_file, model_kmer_means_template)

    seqlengths = get_seq_len(fasta_file)
    get_amplicons()

    ampdict = []
    ampstartdict = dict()
    ampenddict = dict()
    correctedampdict = []
    correctedampstartdict = dict()
    correctedampenddict = dict()
    counter = 0
    procampres = manager.dict()
Ejemplo n.º 4
0
        print record.id
        idlist.append(record.id)

    idsconcat = " ".join(idlist)
    for amplicon in amplicons:
        if amplicon.split(':')[0] not in idsconcat:
            print "!** At least one amplicon is not in your reference sequence.\n\r Please check:"
            print amplicon
            print "!** This program will now exit.\n"
            sys.exit()


    fasta_file = args.fasta
    model_file_template = args.temp_model

    model_kmer_means_template,kmer_len=process_model_file(model_file_template)

    kmerhashT = process_ref_fasta_raw(fasta_file,model_kmer_means_template)

    seqlengths = get_seq_len(fasta_file)
    get_amplicons()

    ampdict=[]
    ampstartdict=dict()
    ampenddict=dict()
    correctedampdict=[]
    correctedampstartdict=dict()
    correctedampenddict=dict()
    counter = 0
    procampres=manager.dict()
Ejemplo n.º 5
0
if __name__ == "__main__":
    multiprocessing.freeze_support()
    p = multiprocessing.Pool(args.procs)
    manager = multiprocessing.Manager()
    amplicon_file = open(args.ids, "r")
    amplicons = []
    for line in amplicon_file.readlines():
        amplicons.append(line.rstrip())
    if (args.verbose is True):
        print amplicons
    amplicon_file.close()
    fasta_file = args.fasta
    model_file_template = args.temp_model
    model_file_complement = args.comp_model
    model_kmer_means_template, tempkmerlen = process_model_file(
        model_file_template)
    model_kmer_means_complement, compkmerlen = process_model_file(
        model_file_complement)
    kmerhashT = process_ref_fasta_raw(fasta_file, model_kmer_means_template,
                                      tempkmerlen)
    kmerhashC = process_ref_fasta_raw(fasta_file, model_kmer_means_complement,
                                      compkmerlen)
    seqlengths = get_seq_len(fasta_file)
    get_amplicons()

    ampdict = []
    ampstartdict = dict()
    ampenddict = dict()
    counter = 0
    procampres = manager.dict()
Ejemplo n.º 6
0
    checkfasta(args.fasta)

    if not os.path.isdir(args.watchdir):
        print "**! Sorry, but the folder " + args.watchdir + " cannot be found.\n\n**!  Please check you have entered the path correctly and try again.\n\n**!  This script will now terminate.\n"
        sys.exit()

    p = multiprocessing.Pool(args.procs)
    manager = multiprocessing.Manager()
    procampres = manager.dict()
    fasta_file = args.fasta
    seqlen = get_seq_len(fasta_file)

    model_file = args.temp_model
    global model_kmer_means
    global kmer_len
    model_kmer_means, kmer_len = process_model_file(model_file)
    seqids, threedarray = process_ref_fasta(fasta_file, model_kmer_means, kmer_len)
    # print "init kmerhash",type(kmerhash)

    # print type(threedarray)

    # model_file = args.temp_model
    # model_kmer_means,kmer_len=process_model_file(model_file)

    # kmerhash_subset = process_ref_fasta_subset(fasta_file,model_kmer_means,seqlen,kmer_len)
    # kmerhash_subset = process_ref_fasta(fasta_file,model_kmer_means,seqlen,kmer_len)

    # sys.exit()

    d = list()
    filenamecounter = 0