GC_WINDOW_SIZE = GC_SCALE_COUNT[-1] # fragment length distribution # if PAIRED_END and not(PAIRED_END_ARTIFICIAL): print 'Using empirical fragment length distribution.' [potential_vals, potential_prob] = pickle.load(open(FRAGLEN_MODEL,'rb')) FRAGLEN_VALS = [] FRAGLEN_PROB = [] for i in xrange(len(potential_vals)): if potential_vals[i] > READLEN: FRAGLEN_VALS.append(potential_vals[i]) FRAGLEN_PROB.append(potential_prob[i]) # should probably add some validation and sanity-checking code here... FRAGLEN_DISTRIBUTION = DiscreteDistribution(FRAGLEN_PROB,FRAGLEN_VALS) FRAGMENT_SIZE = FRAGLEN_VALS[mean_ind_of_weighted_list(FRAGLEN_PROB)] # Indicate not writing FASTQ reads # if NO_FASTQ: print 'Bypassing FASTQ generation...' """************************************************ **** HARD-CODED CONSTANTS ************************************************""" # target window size for read sampling. how many times bigger than read/frag length WINDOW_TARGET_SCALE = 100 # sub-window size for read sampling windows. this is basically the finest resolution # that can be obtained for targeted region boundaries and GC% bias