def get_non_absent_ref_tes(te_gff, absence_bed, sample, out, log): insertions = [] tmp_gff = out + "/tmp.ref_nonabs.gff" command = ["bedtools", "subtract", "-A", "-a", te_gff, "-b", absence_bed] mccutils.run_command_stdout(command, tmp_gff, log=log) with open(tmp_gff, "r") as gff: for line in gff: if "#" not in line: line = line.replace(";", "\t") split_line = line.split("\t") insert = output.Insertion(output.Temp()) insert.chromosome = split_line[0] insert.start = int(split_line[3]) insert.end = int(split_line[4]) insert.name = split_line[9].split( "=")[1] + "|reference|NA|" + sample + "|temp|nonab|" insert.strand = split_line[6] insert.type = "reference" insertions.append(insert) mccutils.remove(tmp_gff) return insertions
def read_insertion_summary(infile, sample): insertions = [] with open(infile, "r") as inf: for x, line in enumerate(inf): if x > 0: insert = output.Insertion(output.Temp()) split_line = line.split("\t") if len(split_line) == 14: insert.chromosome = split_line[0] insert.start = int(split_line[1]) - 1 insert.end = int(split_line[2]) insert.family = split_line[3] insert.name = insert.family + "|non-reference|" + split_line[ 7] + "|" + sample + "|temp|" if "antisense" in split_line[4]: insert.strand = "-" else: insert.strand = "+" insert.support_info.support['class'].value = split_line[5] insert.support_info.support['variantsupport'].value = int( float(split_line[6])) insert.support_info.support['frequency'].value = float( split_line[7]) insert.support_info.support['junction1'].value = int( split_line[8]) insert.support_info.support[ 'junction1support'].value = int(split_line[9]) insert.support_info.support['junction2'].value = int( split_line[10]) insert.support_info.support[ 'junction2support'].value = int(split_line[11]) insert.support_info.support[ 'fiveprimesupport'].value = int(float(split_line[12])) insert.support_info.support[ 'threeprimesupport'].value = int( float(split_line[13].replace("\n", ""))) insert.type = "non-reference" if insert.end >= insert.start and insert.end > 0 and insert.start > -1: # if split read, use junction positions as start and end if insert.support_info.support[ 'junction1support'].value > 0 and insert.support_info.support[ 'junction2support'].value > 0: insert.start = insert.support_info.support[ 'junction1'].value insert.end = insert.support_info.support[ 'junction2'].value insert.name = insert.name + "sr|" # read pair else: insert.name = insert.name + "rp|" insertions.append(insert) else: print( "<TEMP POST> Omitting malformed line from insertion summary results:", line) else: print( "<TEMP POST> Omitting malformed line from insertion summary results:", line) return insertions