try: opts, args = getopt.getopt( sys.argv[1:], 'hi:o:k:s:', ["inputdir=", "outputdir=", "kmersize=", "hashsize="]) except: print help_message sys.exit(2) for opt, arg in opts: if opt in ('-h', '--help'): print help_message sys.exit() elif opt in ('-i', '--inputdir'): inputdir = arg if inputdir[-1] != '/': inputdir += '/' elif opt in ('-o', '--outputdir'): outputdir = arg if outputdir[-1] != '/': outputdir += '/' elif opt in ('-k', '--kmersize'): k_size = int(arg) elif opt in ('-s', '--hashsize'): h_size = int(arg) hashobject = Fastq_Reader(inputdir, outputdir, new_hash=(h_size, k_size)) total_rand_kmers = k_size * h_size * 2 hashobject.rand_kmers_for_wheel(total_rand_kmers) hashobject.set_wheels(wheels=1) os.system('rm %s/random_kmers.fastq' % inputdir) f = open(outputdir + 'hashParts.txt', 'w') f.write('%d\n' % (2**h_size / 10**6 + 1)) f.close()
help_message = 'usage example: python merge_hashq_files.py -r 3 -i /project/home/hashed_reads/ -o /project/home/hashed_reads/' if __name__ == "__main__": try: opts, args = getopt.getopt(sys.argv[1:], 'hr:i:o:', ["filerank=", "inputdir=", "outputdir="]) except: print help_message sys.exit(2) for opt, arg in opts: if opt in ('-r', '--filerank'): fr = int(arg) - 1 elif opt in ('-i', '--inputdir'): inputdir = arg if inputdir[-1] != '/': inputdir += '/' elif opt in ('-o', '--outputdir'): outputdir = arg if outputdir[-1] != '/': outputdir += '/' FP = glob.glob(os.path.join(inputdir, '*hashq*')) FP = [fp[fp.rfind('/') + 1:] for fp in FP] FP = list(set([fp[:fp.index('.')] for fp in FP])) file_prefix = FP[fr] hashobject = Fastq_Reader(inputdir, outputdir) H = hashobject.merge_count_fractions(file_prefix) H = np.array(H, dtype=np.uint16) nz = np.nonzero(H)[0] np.save(hashobject.output_path + file_prefix + '.nonzero.npy', nz) print 'sample %s has %d nonzero elements and %d total observed kmers' % ( file_prefix, len(nz), H.sum())
help_message = 'usage example: python assembly_summary.py -i /project/home/' if __name__ == "__main__": try: opts, args = getopt.getopt(sys.argv[1:],'hi:',["inputdir="]) except: print help_message sys.exit(2) for opt, arg in opts: if opt in ('-h','--help'): print help_message sys.exit() elif opt in ('-i','--inputdir'): inputdir = arg if inputdir[-1] != '/': inputdir += '/' hashobject = Fastq_Reader(inputdir+'read_partitions/',inputdir+'read_partitions/') f = open(bact_names_path) BNames = cPickle.load(f) f.close() f = open(vir_names_path) VNames = cPickle.load(f) f.close() f = open(inputdir+'lib_estimates/samples_grouped_by_lib.csv') reader = csv.reader(f) Sample_ids = [] for row in reader: Sample_ids += row f.close() f_main = open(inputdir+'assembly_alignment_summary.csv','w') writer_main = csv.writer(f_main) writer_main.writerow(['partition','N50','largest contig','total bp','scaffolds','top bacterial alignment','alignment length','top viral alignment','alignment length'])