if __name__ == "__main__": multiprocessing.freeze_support() p = multiprocessing.Pool(args.procs) manager = multiprocessing.Manager() amplicon_file = open(args.ids, "r") amplicons = [] for line in amplicon_file.readlines(): amplicons.append(line.rstrip()) if (args.verbose is True): print amplicons amplicon_file.close() fasta_file = args.fasta model_file_template = args.temp_model model_file_complement = args.comp_model model_kmer_means_template,tempkmerlen=process_model_file(model_file_template) model_kmer_means_complement,compkmerlen=process_model_file(model_file_complement) kmerhashT = process_ref_fasta_raw(fasta_file,model_kmer_means_template,tempkmerlen) kmerhashC = process_ref_fasta_raw(fasta_file,model_kmer_means_complement,compkmerlen) seqlengths = get_seq_len(fasta_file) get_amplicons() ampdict=[] ampstartdict=dict() ampenddict=dict() counter = 0 procampres=manager.dict() for amplicon in amplicons: counter+=1 ampstart = int(float(amplicon.split(':', 1 )[1].split('-',1)[0]))
if not os.path.isdir(args.watchdir): logger.error("Folder" + args.watchdir + " cannot be found. Exiting.") sys.exit() # Multiprocess setup p = multiprocessing.Pool(args.procs) manager = multiprocessing.Manager() procampres = manager.dict() fasta_file = args.fasta seqlen = get_seq_len(fasta_file) # Process model and reference fasta model_file = args.temp_model global model_kmer_means global kmer_len model_kmer_means, kmer_len = process_model_file(model_file) seqids, threedarray = process_ref_fasta(fasta_file, model_kmer_means, kmer_len) # Scrap filenames d = list() filenamecounter = 0 for filename in glob.glob(os.path.join(args.watchdir, '*.fast5')): filenamecounter += 1 d.append([filename, seqids, threedarray, procampres, seqlen, args]) for filename in glob.glob(os.path.join(args.watchdir, "pass", '*.fast5')): filenamecounter += 1 d.append([filename, seqids, threedarray, procampres, seqlen, args]) for filename in glob.glob(os.path.join(args.watchdir, "fail", '*.fast5')): filenamecounter += 1 d.append([filename, seqids, threedarray, procampres, seqlen, args])
for record in SeqIO.parse(args.fasta, 'fasta'): print record.id idlist.append(record.id) idsconcat = " ".join(idlist) for amplicon in amplicons: if amplicon.split(':')[0] not in idsconcat: print "!** At least one amplicon is not in your reference sequence.\n\r Please check:" print amplicon print "!** This program will now exit.\n" sys.exit() fasta_file = args.fasta model_file_template = args.temp_model model_kmer_means_template, kmer_len = process_model_file( model_file_template) kmerhashT = process_ref_fasta_raw(fasta_file, model_kmer_means_template) seqlengths = get_seq_len(fasta_file) get_amplicons() ampdict = [] ampstartdict = dict() ampenddict = dict() correctedampdict = [] correctedampstartdict = dict() correctedampenddict = dict() counter = 0 procampres = manager.dict()
print record.id idlist.append(record.id) idsconcat = " ".join(idlist) for amplicon in amplicons: if amplicon.split(':')[0] not in idsconcat: print "!** At least one amplicon is not in your reference sequence.\n\r Please check:" print amplicon print "!** This program will now exit.\n" sys.exit() fasta_file = args.fasta model_file_template = args.temp_model model_kmer_means_template,kmer_len=process_model_file(model_file_template) kmerhashT = process_ref_fasta_raw(fasta_file,model_kmer_means_template) seqlengths = get_seq_len(fasta_file) get_amplicons() ampdict=[] ampstartdict=dict() ampenddict=dict() correctedampdict=[] correctedampstartdict=dict() correctedampenddict=dict() counter = 0 procampres=manager.dict()
if __name__ == "__main__": multiprocessing.freeze_support() p = multiprocessing.Pool(args.procs) manager = multiprocessing.Manager() amplicon_file = open(args.ids, "r") amplicons = [] for line in amplicon_file.readlines(): amplicons.append(line.rstrip()) if (args.verbose is True): print amplicons amplicon_file.close() fasta_file = args.fasta model_file_template = args.temp_model model_file_complement = args.comp_model model_kmer_means_template, tempkmerlen = process_model_file( model_file_template) model_kmer_means_complement, compkmerlen = process_model_file( model_file_complement) kmerhashT = process_ref_fasta_raw(fasta_file, model_kmer_means_template, tempkmerlen) kmerhashC = process_ref_fasta_raw(fasta_file, model_kmer_means_complement, compkmerlen) seqlengths = get_seq_len(fasta_file) get_amplicons() ampdict = [] ampstartdict = dict() ampenddict = dict() counter = 0 procampres = manager.dict()
checkfasta(args.fasta) if not os.path.isdir(args.watchdir): print "**! Sorry, but the folder " + args.watchdir + " cannot be found.\n\n**! Please check you have entered the path correctly and try again.\n\n**! This script will now terminate.\n" sys.exit() p = multiprocessing.Pool(args.procs) manager = multiprocessing.Manager() procampres = manager.dict() fasta_file = args.fasta seqlen = get_seq_len(fasta_file) model_file = args.temp_model global model_kmer_means global kmer_len model_kmer_means, kmer_len = process_model_file(model_file) seqids, threedarray = process_ref_fasta(fasta_file, model_kmer_means, kmer_len) # print "init kmerhash",type(kmerhash) # print type(threedarray) # model_file = args.temp_model # model_kmer_means,kmer_len=process_model_file(model_file) # kmerhash_subset = process_ref_fasta_subset(fasta_file,model_kmer_means,seqlen,kmer_len) # kmerhash_subset = process_ref_fasta(fasta_file,model_kmer_means,seqlen,kmer_len) # sys.exit() d = list() filenamecounter = 0