Ejemplo n.º 1
0
def generate_slices(args):

	vcf = VCF.VCF()
	vcf.populations = args.populations
	vcf.set_chrms(args.input)

	chrm_2_windows = vcf.chrm2length.fromkeys(vcf.chrm2length.keys(),None)
	
	for count, chrm in enumerate(vcf.chrm2length.keys()):

		length = vcf.chrm2length[chrm]
		window_size = args.window_size
		overlap = args.overlap

		# Skip contigs that are to short
		if length <= window_size: continue
		
		# Fit windows into remaining space
		if (length % window_size) > overlap:
			start = (length % window_size)/2
			stop = (length - window_size) - overlap/2

		# Prevent windows from invading remaining space 
		if (length % window_size) <= overlap:
			start = (length % window_size)/2
			stop = length - overlap*2
				
		starts = range(start, stop, overlap)
		stops = [i+window_size for i in starts]
		windows = zip(starts, stops)
		
		chrm_2_windows[chrm] = windows

	return chrm_2_windows
Ejemplo n.º 2
0
def make_dadi_fs(args, region):

	vcf = VCF.VCF()
	vcf.populations = args.populations
	vcf.set_header(args.input)

	pop_ids = args.populations.keys()

	# Get slice and setup output dictionaries
	chunk = vcf.slice_vcf(args.input, *region)
	if chunk == None:
		return None

	else:
		g = count_alleles(chunk, args.populations)

		final_dadi = {}
		population_level_dadis = dict.fromkeys(pop_ids,{})

		for row_count, row in enumerate(g):

			raw_calls = chunk[row_count] 
			row['outgroups'] = {'ALT': 0, 'REF': 0} # set empty outgroup

			# To Do: Need to create a function to fill outgroup if one is defined.
			# The heliconius dataset, for example, has this.

			if check_outgroup(row) == False: continue # skip outgroup not fixed at one value
			if len(raw_calls['REF']) > 1 or len(raw_calls["ALT"]) > 1: continue # skip multi allelic sites
			
			# CALL BASE FOR OUTGROUP
			outgroup_allele = get_outgroup_base(row, raw_calls)

			# CALL MAJOR ALLELE (BASE) FOR INGROUP
			major_allele = get_ingroup_major_allele(row, raw_calls, outgroup_allele)

			# POLORIZE REF AND ALT FOR INGROUP
			if major_allele != raw_calls['REF']:
				ref, alt = ('ALT','REF')
			else:
				ref, alt = ('REF','ALT')


			calls = {}
			for count, pop in enumerate(pop_ids):
				calls[pop] = (row[pop][ref], row[pop][alt])

			row_id = "{0}_{1}".format(raw_calls['CHROM'],raw_calls['POS'])
		
			dadi_site = {'calls': calls,
				   'context': make_triplet(major_allele),
				   'outgroup_context': make_triplet(outgroup_allele),
				   'outgroup_allele': outgroup_allele,
				   'segregating': (raw_calls[ref], raw_calls[alt])
				   }

			final_dadi[row_id] = dadi_site

		return (final_dadi, pop_ids)
Ejemplo n.º 3
0
def main(args):

	vcf = VCF.VCF()
	vcf.populations = args.populations
	vcf.set_header(args.input)

	pop_ids = args.populations.keys()

	# get slice and setup output dictionaries
	chunk = vcf.vcf_chunk_2_dadi(args.input, args.populations, *args.region)
	g = count_alleles(chunk, args.populations)


	# Create Header Row
	dadi_header = create_dadi_header(args)

	fout = open(args.output,'w')
	fout.write(dadi_header + "\n")

	for row_count, row in enumerate(g):

		raw_calls = chunk[row_count] 
		row['outgroups'] = {'ALT': 0, 'REF': 0} # set empty outgroup

		# To Do: Need to create a function to fill outgroup if one is defined.
		# The heliconius dataset, for example, has this.

		if check_outgroup(row) == False: continue # skip outgroup not fixed at one value
		if len(raw_calls['REF']) > 1 or len(raw_calls["ALT"]) > 1: continue # skip multi allelic sites
		
		# CALL BASE FOR OUTGROUP
		outgroup_allele = get_outgroup_base(row, raw_calls)

		# CALL MAJOR ALLELE (BASE) FOR INGROUP
		major_allele = get_ingroup_major_allele(row, raw_calls, outgroup_allele)

		# POLORIZE REF AND ALT FOR INGROUP
		if major_allele != raw_calls['REF']:
			ref, alt = ('ALT','REF')
		else:
			ref, alt = ('REF','ALT')

		#  CREATE DADI ROW
		dadi_row = [make_triplet(major_allele), make_triplet(outgroup_allele)]

		for count, pop in enumerate(pop_ids):
			if count == 0:
				dadi_row.append(chunk[row_count][ref])
				dadi_row.append(row[pop][ref])
			else:
				dadi_row.append(row[pop][ref])


		for count, pop in enumerate(pop_ids):
			if count == 0:
				dadi_row.append(chunk[row_count][alt]) 
				dadi_row.append(row[pop][alt])
			else:
				dadi_row.append(row[pop][alt])

		dadi_row.append(raw_calls['CHROM'])
		dadi_row.append(raw_calls['POS'])

		dadi_row = " ".join([str(item) for item in dadi_row])
		fout.write(dadi_row + "\n")