Python remove_alignment_number 예제들, lib.psl_lib.remove_alignment_number Python 예제들

예제 #1

0

파일 보기

 def run(self):
     counts = Counter(
         psl_lib.remove_alignment_number(aln_id)
         for aln_id, aln in self.alignment_iterator())
     for aln_id, t in self.transcript_iterator():
         count = counts[psl_lib.remove_alignment_number(aln_id)] - 1
         if count > 0:
             name = self.column + "_{}_Copies".format(count)
             bed_rec = seq_lib.transcript_to_bed(t, self.rgb, name)
             self.details_dict[aln_id].append(bed_rec)
         self.classify_dict[aln_id] = count
     self.dump_results_to_disk()

예제 #2

0

파일 보기

파일: classifiers.py 프로젝트: davidaray/comparativeAnnotator

 def run(self):
     self.getTranscriptDict()
     counts = Counter(psl_lib.remove_alignment_number(aId) for aId in self.transcriptDict)
     detailsDict = {}
     classifyDict = {}
     for aId, t in self.transcriptDict.iteritems():
         if counts[psl_lib.remove_alignment_number(aId)] > 1:
             detailsDict[aId] = seq_lib.transcript_to_bed(t, self.rgb, self.column + "_{}_Copies".format(
                 counts[psl_lib.remove_alignment_number(aId)] - 1))
             classifyDict[aId] = 1
         else:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)

예제 #3

0

파일 보기

파일: classifiers.py 프로젝트: davidaray/comparativeAnnotator

 def run(self):
     self.getTranscriptDict()
     self.getAnnotationDict()
     self.getSeqDict()
     self.getRefDict()
     self.getAlignmentDict()
     detailsDict = defaultdict(list)
     classifyDict = {}
     for aId, t in self.transcriptDict.iteritems():
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         aln = self.alignmentDict[aId]
         if a.getCdsLength() <= 75 or t.getCdsLength() <= 75:
             continue
         # TODO: this will miss an inframe stop if it is the last 3 bases that are not the annotated stop.
         # use the logic from EndStop to flag this
         codons = list(codonPairIterator(a, t, aln, self.seqDict, self.refDict))[:-1]
         for i, target_codon, query_codon in codons:
             if seq_lib.codon_to_amino_acid(target_codon) == "*":
                 if target_codon == query_codon:
                     detailsDict[aId].append(seq_lib.cds_coordinate_to_bed(t, i, i + 3, self.colors["input"],
                                                                        self.column))
                 else:
                     detailsDict[aId].append(seq_lib.cds_coordinate_to_bed(t, i, i + 3, self.rgb, self.column))
                 classifyDict[aId] = 1
         if aId not in classifyDict:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)

예제 #4

0

파일 보기

파일: classifiers.py 프로젝트: davidaray/comparativeAnnotator

 def run(self):
     stopCodons = ('TAA', 'TGA', 'TAG')
     self.getAlignmentDict()
     self.getTranscriptDict()
     self.getAnnotationDict()
     self.getSeqDict()
     self.getRefDict()
     detailsDict = {}
     classifyDict = {}
     for aId, t in self.transcriptDict.iteritems():
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         aln = self.alignmentDict[aId]
         if a.getCdsLength() <= 75 or t.getCdsLength() <= 75:
             continue
         s = t.getCdsLength()
         cds_positions = [t.chromosome_coordinate_to_cds(aln.query_coordinate_to_target(a.cds_coordinate_to_transcript(i)))
                          for i in xrange(s - 4, s - 1)]
         if None in cds_positions or t.get_cds(self.seqDict)[-3:] not in stopCodons:
             # does this problem exist in the reference?
             if a.get_cds(self.refDict)[-3:] not in stopCodons:
                 detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, s - 3, s, self.colors["input"], self.column)
             else:
                 detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, s - 3, s, self.rgb, self.column)
             classifyDict[aId] = 1
         else:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)

예제 #5

0

파일 보기

파일: classifiers.py 프로젝트: davidaray/comparativeAnnotator

 def run(self, shortIntronSize=30):
     self.getTranscriptDict()
     self.getSeqDict()
     self.getAlignmentDict()
     self.getAnnotationDict()
     self.getRefDict()
     detailsDict = defaultdict(list)
     classifyDict = {}
     for aId, t in self.transcriptDict.iteritems():
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         aln = self.alignmentDict[aId]
         for intron in t.intronIntervals:
             if len(intron) <= shortIntronSize:
                 continue
             elif intron.start >= t.thickStart and intron.stop < t.thickStop:
                 continue
             seq = intron.get_sequence(self.seqDict, strand=True)
             donor, acceptor = seq[:2], seq[-2:]
             if donor not in self.non_canonical or self.non_canonical[donor] != acceptor:
                 classifyDict[aId] = 1
                 # is this a intron that exists in the reference that also has this problem?
                 if compareIntronToReference(intron, a, aln, self.non_canonical, self.refDict) is True:
                     detailsDict[aId].append(seq_lib.splice_intron_interval_to_bed(t, intron, self.colors["input"],
                                                                               self.column))
                 else:
                     detailsDict[aId].append(seq_lib.splice_intron_interval_to_bed(t, intron, self.rgb, self.column))
         if aId not in classifyDict:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)

예제 #6

0

파일 보기

파일: classifiers.py 프로젝트: davidaray/comparativeAnnotator

 def run(self):
     self.getTranscriptDict()
     self.getAlignmentDict()
     self.getAnnotationDict()
     self.getSeqDict()
     self.getRefDict()
     detailsDict = {}
     classifyDict = {}
     for aId, t in self.transcriptDict.iteritems():
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         aln = self.alignmentDict[aId]
         # do not include noncoding transcripts or lift-overs that contain less than 25 codons
         if a.getCdsLength() <= 75 or t.getCdsLength() <= 75:
             continue
         cds_positions = [t.chromosome_coordinate_to_cds(aln.query_coordinate_to_target(a.cds_coordinate_to_transcript(i)))
                          for i in xrange(3)]
         if None in cds_positions:
             detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.rgb, self.column)
             classifyDict[aId] = 1
         elif t.get_cds(self.seqDict)[:3] != "ATG":
             if a.get_cds(self.refDict)[:3] != "ATG":
                 detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.colors["input"], self.column)
             else:
                 detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.rgb, self.column)
             classifyDict[aId] = 1
         else:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)

예제 #7

0

파일 보기

파일: classifiers.py 프로젝트: davidaray/comparativeAnnotator

 def run(self):
     self.getTranscriptDict()
     self.getAnnotationDict()
     detailsDict = {}
     classifyDict = {}
     for aId, t in self.transcriptDict.iteritems():
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         # do not include noncoding transcripts or lift-overs that contain less than 25 codon
         if a.getCdsLength() <= 75 or t.getCdsLength() <= 75:
             continue
         # is this is a problem in the reference?
         # remove all -1 frames because those are UTR exons
         a_frames = [x for x in a.exonFrames if x != -1]
         if a.strand is True and a_frames[0] != 0 or a.strand is False and a_frames[-1] != 0:
             classifyDict[aId] = 1
             detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.colors["input"], self.column)
             continue
         # remove all -1 frames because those are UTR exons
         t_frames = [x for x in t.exonFrames if x != -1]
         if t.strand is True and t_frames[0] != 0 or t.strand is False and t_frames[-1] != 0:
             classifyDict[aId] = 1
             detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.rgb, self.column)
             continue
         classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)

예제 #8

0

파일 보기

 def alignment_refalignment_transcript_annotation_iterator(self):
     if self.annotation_dict is None:
         self.get_annotation_dict()
     for aln_id, aln, ref_aln, t in self.alignment_refalignment_transcript_iterator(
     ):
         a = self.annotation_dict[psl_lib.remove_alignment_number(aln_id)]
         yield aln_id, aln, ref_aln, t, a

예제 #9

0

파일 보기

파일: classifiers.py 프로젝트: davidaray/comparativeAnnotator

 def run(self, shortIntronSize=30):
     self.getAnnotationDict()
     self.getTranscriptDict()
     self.getAlignmentDict()
     detailsDict = defaultdict(list)
     classifyDict = {}
     for aId, aln in self.alignmentDict.iteritems():
         if aId not in self.transcriptDict:
             continue
         t = self.transcriptDict[aId]
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         original_introns = {(x.start, x.stop) for x in a.intronIntervals}
         target_introns = set()
         target_intron_mapping = {}
         for intron in t.intronIntervals:
             a_start = a.transcript_coordinate_to_chromosome(aln.target_coordinate_to_query(intron.start - 1)) + 1
             a_stop = a.transcript_coordinate_to_chromosome(aln.target_coordinate_to_query(intron.stop))
             target_introns.add((a_start, a_stop))
             target_intron_mapping[(a_start, a_stop)] = intron
         missing_introns = original_introns - target_introns
         if len(missing_introns) != 0:
             classifyDict[aId] = 1
             not_original_introns = target_introns - original_introns
             for a_start, a_stop in not_original_introns:
                 intron = target_intron_mapping[(a_start, a_stop)]
                 if len(intron) >= shortIntronSize:
                     detailsDict[aId].append(seq_lib.splice_intron_interval_to_bed(t, intron, self.rgb, self.column))
         else:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)

예제 #10

0

파일 보기

파일: attributes.py 프로젝트: yuzhenpeng/comparativeAnnotator

 def run(self):
     self.get_annotation_dict()
     results_dict = {
         aln_id:
         self.annotation_dict[psl_lib.remove_alignment_number(aln_id)].start
         for aln_id, t in self.transcript_iterator()
     }
     self.dump_attribute_results_to_disk(results_dict)

예제 #11

0

파일 보기

 def alignment_transcript_annotation_iterator(self):
     """
     Convenience function for iterating over alignment, ref transcript and tgt transcript
     """
     if self.annotation_dict is None:
         self.get_annotation_dict()
     for aln_id, aln, t in self.alignment_transcript_iterator():
         a = self.annotation_dict[psl_lib.remove_alignment_number(aln_id)]
         yield aln_id, aln, t, a

예제 #12

0

파일 보기

def main():
    args = parse_args()
    aln_dict = psl_lib.get_alignment_dict(args.psl)
    ref_aln_dict = psl_lib.get_alignment_dict(args.refPsl)
    tx_dict = seq_lib.get_transcript_dict(args.gp)
    with open(args.outPath, "w") as outf:
        for aln_id, aln in sorted(aln_dict.iteritems(), key=lambda x: x[0]):
            ref_aln = ref_aln_dict[psl_lib.remove_alignment_number(aln_id)]
            t = tx_dict[aln_id]
            vec = build_intron_vector(aln, ref_aln, t, args.fuzz_distance)
            outf.write("{}\t{}\n".format(aln_id, ",".join(vec)))

예제 #13

0

파일 보기

파일: classifiers.py 프로젝트: davidaray/comparativeAnnotator

 def run(self, cdsCutoff=75):
     self.getTranscriptDict()
     self.getAnnotationDict()
     detailsDict = {}
     classifyDict = {}
     for aId, t in self.transcriptDict.iteritems():
         # do not include noncoding transcripts
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         if a.getCdsLength() < 3:
             continue
         elif a.getCdsLength() <= cdsCutoff:
             detailsDict[aId] = seq_lib.transcript_to_bed(t, self.colors["input"], self.column)
             classifyDict[aId] = 1
         elif t.getCdsLength() <= cdsCutoff:
             detailsDict[aId] = seq_lib.transcript_to_bed(t, self.rgb, self.column)
             classifyDict[aId] = 1
         else:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)

예제 #14

0

파일 보기

파일: classifiers.py 프로젝트: davidaray/comparativeAnnotator

 def run(self):
     self.getAlignmentDict()
     self.getTranscriptDict()
     self.getAnnotationDict()
     detailsDict = defaultdict(list)
     classifyDict = {}
     for aId, aln in self.alignmentDict.iteritems():
         if aId not in self.transcriptDict:
             continue
         t = self.transcriptDict[aId]
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         # do not include noncoding transcripts or lift-overs that contain less than 1 codon
         if a.getCdsLength() <= 75 or t.getCdsLength() <= 75:
             continue
         frame_shifts = list(frameShiftIterator(a, t, aln))
         if len(frame_shifts) == 0:
             classifyDict[aId] = 0
             continue
         indel_starts, indel_stops, spans = zip(*frame_shifts)
         # calculate cumulative frame by adding each span and taking mod 3 - zeroes imply regaining frame
         # note that this code prepends a 0 to the list, offsetting all values by 1. This is useful.
         cumulative_frame = map(lambda x: x % 3, reduce(lambda l, v: (l.append(l[-1] + v) or l), spans, [0]))
         # every start is when a zero existed in the previous spot in cumulative_frame
         windowed_starts = [x for x, y in izip(indel_starts, cumulative_frame) if y == 0 or x == indel_starts[0]]
         # every stop is when a zero exists at this cumulative_frame
         windowed_stops = [x for x, y in izip(indel_stops, cumulative_frame[1:]) if y == 0]
         # sanity check
         assert any([len(windowed_starts) == len(windowed_stops), len(windowed_starts) - 1 == len(windowed_stops)]),\
             (self.genome, self.column, aId)
         # now we need to fix frame and stops - if this shift extends to the end of the transcript, add that stop
         # additionally, if this is a negative strand transcript, flip starts/stops so that start is always < stop
         if len(windowed_stops) < len(windowed_starts) and t.strand is False:
             windowed_stops.append(t.thickStart)
             windowed_stops, windowed_starts = windowed_starts, windowed_stops
         elif len(windowed_stops) < len(windowed_starts):
             windowed_stops.append(t.thickStop)
         elif t.strand is False:
             windowed_stops, windowed_starts = windowed_starts, windowed_stops
         for start, stop in izip(windowed_starts, windowed_stops):
             detailsDict[aId].append(seq_lib.chromosome_coordinate_to_bed(t, start, stop, self.rgb, self.column))
         classifyDict[aId] = 1
     self.dumpValueDicts(classifyDict, detailsDict)

예제 #15

0

파일 보기

파일: classifiers.py 프로젝트: davidaray/comparativeAnnotator

 def run(self):
     self.getAlignmentDict()
     self.getTranscriptDict()
     self.getAnnotationDict()
     detailsDict = {}
     classifyDict = {}
     for aId, t in self.transcriptDict.iteritems():
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         if a.getCdsLength() <= 75 or t.getCdsLength() <= 75:
             continue
         if t.getCdsLength() % 3 != 0 and a.getCdsLength() % 3 != 0:
             detailsDict[aId] = seq_lib.chromosome_coordinate_to_bed(t, t.thickStart, t.thickStop, self.colors["input"],
                                                                  self.column)
             classifyDict[aId] = 1
         elif t.getCdsLength() % 3 != 0:
             detailsDict[aId] = seq_lib.chromosome_coordinate_to_bed(t, t.thickStart, t.thickStop, self.rgb,
                                                                  self.column)
             classifyDict[aId] = 1
         else:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)

예제 #16

0

파일 보기

파일: classifiers.py 프로젝트: davidaray/comparativeAnnotator

 def run(self, mult3=False):
     self.getAlignmentDict()
     self.getTranscriptDict()
     self.getAnnotationDict()
     detailsDict = {}
     classifyDict = {}
     for aId, aln in self.alignmentDict.iteritems():
         if aId not in self.transcriptDict:
             continue
         t = self.transcriptDict[aId]
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         # do not include noncoding transcripts or lift-overs that contain less than 25 codon
         if a.getCdsLength() <= 75 or t.getCdsLength() <= 75:
             continue
         deletions = [seq_lib.chromosome_region_to_bed(t, start, stop, self.rgb, self.column) for start, stop, size in
                      deletionIterator(a, t, aln, mult3) if start >= t.thickStart and stop < t.thickStop]
         if len(deletions) > 0:
             detailsDict[aId] = deletions
             classifyDict[aId] = 1
         else:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)

예제 #17

0

파일 보기

파일: transmap_analysis.py 프로젝트: yuzhenpeng/comparativeAnnotator

def categorized_plot(cur, highest_cov_dict, genomes, out_path, file_name,
                     biotype, biotype_ids, gencode, query_fn):
    results = []
    for g in genomes:
        best_ids = set(zip(*highest_cov_dict[g].itervalues())[0])
        query = query_fn(g, biotype, details=False)
        categorized_ids = sql_lib.get_query_ids(cur, query)
        num_categorized = len({
            x
            for x in categorized_ids if x in best_ids
            and psl_lib.remove_alignment_number(x) in biotype_ids
        })
        norm = num_categorized / (0.01 * len(biotype_ids))
        results.append([g, norm, num_categorized])
    title_string = "Proportion of {:,} {} transcripts in {}\ncategorized as {}"
    title_string = title_string.format(len(biotype_ids), biotype, gencode,
                                       query_fn.__name__)
    plot_lib.barplot(results,
                     out_path,
                     file_name,
                     title_string,
                     adjust_y=False)

예제 #18

0

파일 보기

def align(target, g, target_fasta, chunk, ref_fasta, out_path):
    g_f = Fasta(target_fasta)
    r_f = Fasta(ref_fasta)
    results = []
    for aug_aId in chunk:
        aId = remove_augustus_alignment_number(aug_aId)
        gencode_id = remove_alignment_number(aId)
        gencode_seq = str(r_f[gencode_id])
        aug_seq = str(g_f[aug_aId])
        tmp_aug = os.path.join(target.getLocalTempDir(), "tmp_aug")
        tmp_gencode = os.path.join(target.getLocalTempDir(), "tmp_gencode")
        fastaWrite(tmp_aug, aug_aId, aug_seq)
        fastaWrite(tmp_gencode, gencode_id, gencode_seq)
        r = popenCatch("blat {} {} -out=psl -noHead /dev/stdout".format(tmp_gencode, tmp_aug))
        r = r.split("\n")[:-3]
        if len(r) == 0:
            results.append([aug_aId, "0", "0"])
        else:
            p_list = [PslRow(x) for x in r]
            results.append(map(str, [aug_aId, identity(p_list), coverage(p_list)]))
    with open(os.path.join(out_path, getRandomAlphaNumericString(10) + ".txt"), "w") as outf:
        for x in results:
            outf.write("\t".join(x) + "\n")

예제 #19

0

파일 보기

파일: classifiers.py 프로젝트: davidaray/comparativeAnnotator

 def run(self):
     self.getTranscriptDict()
     self.getAnnotationDict()
     self.getSeqDict()
     self.getRefDict()
     self.getAlignmentDict()
     detailsDict = defaultdict(list)
     classifyDict = {}
     for aId, aln in self.alignmentDict.iteritems():
         if aId not in self.transcriptDict:
             continue
         t = self.transcriptDict[aId]
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         if a.getCdsLength() <= 75 or t.getCdsLength() <= 75:
             continue
         for i, target_codon, query_codon in codonPairIterator(a, t, aln, self.seqDict, self.refDict):
             if target_codon != query_codon and seq_lib.codon_to_amino_acid(target_codon) == \
                     seq_lib.codon_to_amino_acid(query_codon):
                 detailsDict[aId].append(seq_lib.cds_coordinate_to_bed(t, i, i + 3, self.rgb, self.column))
                 classifyDict[aId] = 1
         if aId not in classifyDict:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)

예제 #20

0

파일 보기

파일: align_augustus.py 프로젝트: yuzhenpeng/comparativeAnnotator

def align(target, target_fasta, chunk, ref_fasta, file_tree):
    g_f = Fasta(target_fasta)
    r_f = Fasta(ref_fasta)
    results = []
    tmp_aug = os.path.join(target.getGlobalTempDir(), "tmp_aug")
    tmp_gencode = os.path.join(target.getGlobalTempDir(), "tmp_gencode")
    tmp_psl = os.path.join(target.getGlobalTempDir(), "tmp_psl")
    with open(tmp_aug, "w") as tmp_aug_h, open(tmp_gencode,
                                               "w") as tmp_gencode_h:
        for tgt_id in chunk:
            query_id = remove_augustus_alignment_number(tgt_id)
            gencode_id = remove_alignment_number(query_id)
            gencode_seq = str(r_f[gencode_id])
            aug_seq = str(g_f[tgt_id])
            fastaWrite(tmp_aug_h, tgt_id, aug_seq)
            fastaWrite(tmp_gencode_h, gencode_id, gencode_seq)
    system("blat {} {} -out=psl -noHead {}".format(tmp_aug, tmp_gencode,
                                                   tmp_psl))
    r = popenCatch("simpleChain -outPsl {} /dev/stdout".format(tmp_psl))
    r = r.split("\n")[:-1]
    r_d = defaultdict(list)
    for p in tokenize_stream(r):
        psl = PslRow(p)
        r_d[psl.t_name].append(psl)
    assert len(r_d.viewkeys() & set(chunk)) > 0, (r_d.viewkeys(), set(chunk))
    for tgt_id in chunk:
        if tgt_id not in r_d:
            results.append([tgt_id, query_id, "0", "0"])
        else:
            p_list = [[min(x.coverage, x.target_coverage), x.identity]
                      for x in r_d[tgt_id]]
            best_cov, best_ident = sorted(p_list, key=lambda x: x[0])[-1]
            results.append(map(str, [tgt_id, query_id, best_cov, best_ident]))
    with open(file_tree.getTempFile(), "w") as outf:
        for x in results:
            outf.write("".join([",".join(x), "\n"]))

예제 #21

0

파일 보기

파일: clustering_classifiers.py 프로젝트: davidaray/comparativeAnnotator

dev.off()

rules <- apriori(data.transactions, parameter=list(support=0.01, confidence=0.7))
# i have no f*****g clue how to interpret this, but at least the frequency plot is nice


# now I want to re-run this on just Basic set protein_coding. I can't just dump to csv this time.
from scripts.coverage_identity_ok_plots import *
from scripts.consensus import *
attrs = "/cluster/home/ifiddes/mus_strain_data/pipeline_data/comparative/1504/transMap/2015-05-28/data/wgEncodeGencodeAttrsVM4.tsv"
coding_ids = get_all_ids(attrs, biotype="protein_coding")
basic_ids = {x.split()[0] for x in open("/cluster/home/ifiddes/mus_strain_data/pipeline_data/comparative/1504/transMap/2015-05-28/data/wgEncodeGencodeBasicVM4.gp")}
basic_coding = {x for x in basic_ids if x in coding_ids}
tm_cmd = """SELECT AlignmentId,{} FROM main.'C57B6NJ'""".format(",".join(tm_fields))
r = cur.execute(tm_cmd).fetchall()
r_coding = [x for x in r if remove_alignment_number(x[0]) in basic_coding]
with open("transmap_coding_only.csv", "w") as outf:
    outf.write("AlignmentId," + ",".join(tm_fields) + "\n")
    for x in r_coding:
        outf.write(",".join(map(str, x)) + "\n")


data <- read.csv("/hive/users/ifiddes/comparativeAnnotator/transmap_coding_only.csv", row.names=1, header=T, na.strings="None")
data[is.na(data)] <- 0
mat <- sapply(as.data.frame(data), as.logical)
mat.t <- t(mat)
library(stats)
d <- dist(mat.t, method="binary")
hc <- hclust(d, method="ward")
pdf("transMap_clustered_classifiers_coding_basic.pdf")
plot(hc)

예제 #22

0

파일 보기

def strip_alignment_numbers(aln_id):
    """
    Convenience function for stripping both Augustus and transMap alignment IDs from a aln_id
    """
    return remove_alignment_number(remove_augustus_alignment_number(aln_id))

예제 #23

0

파일 보기

파일: attributes.py 프로젝트: yuzhenpeng/comparativeAnnotator

 def run(self):
     results_dict = {
         aln_id: psl_lib.remove_alignment_number(aln_id)
         for aln_id, t in self.transcript_iterator()
     }
     self.dump_attribute_results_to_disk(results_dict)

예제 #24

0

파일 보기

파일: attributes.py 프로젝트: davidaray/comparativeAnnotator

 def run(self):
     self.getAnnotationDict()
     self.getAlignmentDict()
     valueDict = {aId: self.annotationDict[psl_lib.remove_alignment_number(aId)].chromosome for aId in
                  self.alignmentDict}
     self.dumpValueDict(valueDict)

예제 #25

0

파일 보기

파일: attributes.py 프로젝트: davidaray/comparativeAnnotator

 def run(self):
     self.getAttributeDict()
     self.getAlignmentDict()
     valueDict = {aId: self.attributeDict[psl_lib.remove_alignment_number(aId)].transcriptType for aId in
                  self.alignmentDict}
     self.dumpValueDict(valueDict)

예제 #26

0

파일 보기

파일: attributes.py 프로젝트: davidaray/comparativeAnnotator

 def run(self):
     self.getAlignmentDict()
     valueDict = {aId: psl_lib.remove_alignment_number(aId) for aId in self.alignmentDict}
     self.dumpValueDict(valueDict)

예제 #27

0

파일 보기

파일: attributes.py 프로젝트: davidaray/comparativeAnnotator

 def run(self):
     self.getAnnotationDict()
     self.getAlignmentDict()
     valueDict = {aId: seq_lib.convert_strand(self.annotationDict[psl_lib.remove_alignment_number(aId)].strand) for
                  aId in self.alignmentDict}
     self.dumpValueDict(valueDict)