Beispiel #1
0
def create_sgr(output_dir, eland_file_path, chr):
    eland = ElandFile(eland_file_path, 'r')
    output = open(os.path.join(output_dir, chr + ".sgr"), 'w')
    signals = {}
    for hit in eland:
        read_length = len(hit.sequence)
        if hit.strand == 'F':
            start = int(hit.coordinate)
            stop = int(hit.coordinate) + WINDOW
        elif hit.strand == 'R':
            start = max(int(hit.coordinate) + read_length - WINDOW, 1)
            stop = int(hit.coordinate) + read_length

        if start in signals:
            signals[start] += 1
        else:
            signals[start] = 1
        if stop in signals:
            signals[stop] += -1
        else:
            signals[stop] = -1

    sorted_keys = signals.keys()
    sorted_keys.sort()
    height = 0
    for coord in sorted_keys:
        height += signals[coord]
        s = SGR(chr, coord, height)
        output.write(str(s) + "\n")
    eland.close()
    output.close()
def merge_unique_eland(output, mapped_reads_files, mismatches=2):
	eland_out = ElandFile(output, 'w')
	for i in mapped_reads_files:
		if not os.path.exists(i):
			raise Exception("File %s does not exist" % i)
		if i.endswith('.bam'):
			convert_bam(eland_out, i, mismatches)
			continue
		if i.endswith('.sam'):
			convert_sam(eland_out, i, mismatches)
			continue
		if 'multi' in i:
			eland_in = ElandMultiFile(i, 'r')
		elif 'extended' in i:
			eland_in = ElandExtendedFile(i, 'r')
		else:
			eland_in = ElandFile(i, 'r')
		total_passed = 0
		for i, line in enumerate(eland_in):
			best_hits = line.best_matches()
			if len(best_hits) == 0:
				continue
			elif len(best_hits) > 1:
				continue  # Only merge unique hits
			elif best_hits[0].number_of_mismatches() > mismatches:
				continue
			else:
				total_passed += 1
				eland_out.write(line.convert_to_eland())
		print "unique eland: total lines", i, "total passed", total_passed
		eland_in.close()
	eland_out.close()
Beispiel #3
0
def create_sgr(output_dir, eland_file_path, chr):
	eland = ElandFile(eland_file_path, 'r')
	output = open(os.path.join(output_dir, chr + ".sgr"), 'w')
	signals = {}
	for hit in eland:
		read_length = len(hit.sequence)
		if hit.strand == 'F':
			start = int(hit.coordinate)
			stop = int(hit.coordinate) + WINDOW
		elif hit.strand == 'R':
			start = max(int(hit.coordinate) + read_length - WINDOW, 1)
			stop = int(hit.coordinate) + read_length
		
		if start in signals:
			signals[start] += 1
		else:
			signals[start] = 1
		if stop in signals:
			signals[stop] += -1
		else:
			signals[stop] = -1
			
	sorted_keys = signals.keys()
	sorted_keys.sort()
	height = 0
	for coord in sorted_keys:
		height += signals[coord]
		s = SGR(chr, coord, height)
		output.write(str(s) + "\n")
	eland.close()
	output.close()
Beispiel #4
0
def divide_eland_by_chr(eland_file, genome, output_dir=""):
	chr_files = {}
	chr_map = get_chr_mapping(genome)
	i = ElandFile(eland_file, 'r')
	for line in i:
		if line.chr_name not in chr_map:
			#print "%s not a valid chromosome name, skipping." % line.chr_name
			continue
		o = open_chr_file(line.chr_name, chr_files, genome, chr_map, output_dir)
		o.write(line)
	i.close()
	for f in chr_files.values():
		f.close()
Beispiel #5
0
def divide_eland_by_chr(eland_file, genome, output_dir=""):
    chr_files = {}
    chr_map = get_chr_mapping(genome)
    i = ElandFile(eland_file, 'r')
    for line in i:
        if line.chr_name not in chr_map:
            #print "%s not a valid chromosome name, skipping." % line.chr_name
            continue
        o = open_chr_file(line.chr_name, chr_files, genome, chr_map,
                          output_dir)
        o.write(line)
    i.close()
    for f in chr_files.values():
        f.close()
Beispiel #6
0
def open_chr_file(chr_name, chr_files, genome, chr_map, output_dir=""):
    if chr_name in chr_files:
        return chr_files[chr_name]
    else:
        f = ElandFile(
            os.path.join(output_dir, '%s_eland.txt' % (chr_map[chr_name])),
            'w')
        chr_files[chr_name] = f
        return f
def merge_unique_eland(output, mapped_reads_files, mismatches=2):
    print "merge_filter %s to %s" % (','.join(mapped_reads_files), output)
    eland_out = ElandFile(output, 'w')
    for i in mapped_reads_files:
        if not os.path.exists(i):
            raise Exception("File %s does not exist" % i)
        if i.endswith('.bam'):
            convert_bam(eland_out, i, mismatches)
            continue
        if i.endswith('.sam'):
            convert_sam(eland_out, i, mismatches)
            continue
        if 'multi' in i:
            print " multi eland ..."
            eland_in = ElandMultiFile(i, 'r')
        elif 'extended' in i:
            print " extended ..."
            eland_in = ElandExtendedFile(i, 'r')
        else:
            print "ElandFile ..."
            eland_in = ElandFile(i, 'r')
        total_passed = 0
        for i, line in enumerate(eland_in):
            best_hits = line.best_matches()
            if len(best_hits) == 0:
                continue
            elif len(best_hits) > 1:
                continue  # Only merge unique hits
            elif best_hits[0].number_of_mismatches() > mismatches:
                continue
            else:
                total_passed += 1
                eland_out.write(line.convert_to_eland())
            print "best_hits", best_hits
        print "unique eland: total lines", i, "total passed", total_passed
        eland_in.close()
    eland_out.close()