def build_hash_and_pickle(ref_fn, key_length, force_rebuild=False): reference_hash_pkl_fn = '{}_hash_keylength_{}.pkl'.format( splitext(ref_fn)[0], key_length) if exists(reference_hash_pkl_fn) and not force_rebuild: ref_genome_hash = pickle.load(open(reference_hash_pkl_fn, 'rb')) if max(map(len, ref_genome_hash)) == key_length: return ref_genome_hash else: pass else: pass reference = read_reference(ref_fn) ref_genome_hash = make_genome_hash(reference, key_length) pickle.dump(ref_genome_hash, open(reference_hash_pkl_fn, 'wb')) return ref_genome_hash
help='File containing a reference genome.') parser.add_argument('-r', '--reads', required=True, dest='reads_file', help='File containg sequencing reads.') parser.add_argument('-o', '--outputFile', required=True, dest='output_file', help='Output file name.') args = parser.parse_args() reference_fn = args.reference_file reads_fn = args.reads_file output_fn = args.output_file input_reads = read_reads(reads_fn) # This will take a while; you can use an array slice for example: # # input_reads = reads[:300] # # to generate some data quickly. reference = read_reference(reference_fn) alignments, reads = trivial_algorithm(input_reads, reference) output_str = pretty_print_aligned_reads_with_ref(reads, alignments, reference) with (open(output_fn, 'w')) as output_file: output_file.write(output_str)