Exemple #1
0
def build_gene(elements, fasta=None, ref_genes=None):
    gene_min = min( min(e) for e in chain(
            elements.tss_exons, elements.tes_exons, elements.se_transcripts))
    gene_max = max( max(e) for e in chain(
            elements.tss_exons, elements.tes_exons, elements.se_transcripts))
        
    transcripts = []
    for i, exons in enumerate( build_transcripts_from_elements( 
            elements.tss_exons, elements.internal_exons, elements.tes_exons,
            elements.se_transcripts, elements.introns, elements.strand ) ):
        transcript = Transcript(
            "%s_%i" % ( elements.id, i ), elements.chrm, elements.strand, 
            exons, cds_region=None, gene_id=elements.id)
        transcript.promoter = find_matching_promoter_for_transcript(
            transcript, elements.promoter)
        transcript.polya_region = find_matching_polya_region_for_transcript(
            transcript, elements.polyas)
        transcripts.append( transcript )

    if len(transcripts) == 0:
        return None
    
    gene = Gene(elements.id, elements.id,
                elements.chrm, elements.strand, 
                gene_min, gene_max, transcripts)

    if fasta != None:
        gene.transcripts = find_cds_for_gene( 
            gene, fasta, only_longest_orf=True )
    
    if ref_genes != None:
        gene = rename_transcripts(gene, ref_genes)
    
    return gene
Exemple #2
0
def load_assembled_transcripts(filename, ref_genome):
    datafile = open(filename)
    data = datafile.readlines()
    list_transcripts = []
    for line in data:
        column = line.split('\t')
        if column[2] == "transcript":
            try:
                list_transcripts.append(itranscript)
            except:
                pass
            transcript_info = column[8].split(' ')
            transcript_id = transcript_info[3].strip()
            gene_id = transcript_info[1].strip()
            chromosome = column[0]
            if chromosome in ref_genome.chromosomes_dict:
                fpkm = transcript_info[7].strip(";").strip("\"")
                sign = column[6].strip()
                itranscript = Transcript(transcript_id, gene_id, chromosome, fpkm, sign)
            else:
                pass
        elif column[2] == "exon":
            if column[0] in ref_genome.chromosomes_dict:
                transcript_info = column[8].split(' ')
                exon_start = int(column[3])
                exon_end = int(column[4])
                if itranscript.id == transcript_info[3]:
                    itranscript.add_exon([exon_start, exon_end],
                                         ref_genome.chromosomes_dict[itranscript.chromosome][exon_start - 1:exon_end])
                else:
                    print 'WARNING ', transcript_info, itranscript.id
            else:
                pass
    return list_transcripts
def writeTranscriptOutput(transcripts, outSam, outFa, genome):

    for t in transcripts.keys():
        print t
        currTranscript = transcripts[t]
        outSam.write(Transcript.printableSAM(currTranscript, genome) + "\n")
        outFa.write(Transcript.printableFa(currTranscript) + "\n")
    return
Exemple #4
0
def compareGTFs(truthGTF, compGTF):
    transcriptsTruth = dict()
    with open(truthGTF, 'r') as tsv:
        for line in tsv:
            row = line.strip().split('\t')
            if len(row) < 5:
                continue

            covIndex = row[8].find('cov')
            covStart = row[8].find('"', covIndex) + 1
            covEnd = row[8].find('"', covStart)
            cov = float(row[8][covStart:covEnd])

            transcriptIdIndex = row[8].find('transcript_id')
            transcriptIdStart = row[8].find('"', transcriptIdIndex) + 1
            transcriptIdEnd = row[8].find('"', transcriptIdStart)
            transcriptId = row[8][transcriptIdStart:transcriptIdEnd]

            if row[2] == 'transcript':
                transcriptsTruth[transcriptId] = Transcript(
                    row[0], int(row[3]), int(row[4]), cov, transcriptId)
            elif row[2] == 'exon':
                transcriptsTruth[transcriptId].exons.append(
                    (int(row[3]), int(row[4])))

    transcriptsTruth = transcriptsTruth.values()

    transcriptsComp = dict()
    with open(compGTF, 'r') as tsv:
        for line in tsv:
            row = line.strip().split('\t')
            if len(row) < 5:
                continue

            covIndex = row[8].find('cov')
            covStart = row[8].find('"', covIndex) + 1
            covEnd = row[8].find('"', covStart)
            cov = float(row[8][covStart:covEnd])

            transcriptIdIndex = row[8].find('transcript_id')
            transcriptIdStart = row[8].find('"', transcriptIdIndex) + 1
            transcriptIdEnd = row[8].find('"', transcriptIdStart)
            transcriptId = row[8][transcriptIdStart:transcriptIdEnd]

            if row[2] == 'transcript':
                transcriptsComp[transcriptId] = Transcript(
                    row[0], int(row[3]), int(row[4]), cov, transcriptId)
            elif row[2] == 'exon':
                transcriptsComp[transcriptId].exons.append(
                    (int(row[3]), int(row[4])))

    transcriptsComp = transcriptsComp.values()

    compareAll(transcriptsTruth, transcriptsComp)
    def check_level(self, line):
        if int(line.attrs['level']) < int(self.transcript.attrs['level']):
            self.transcript = Transcript(line)
            self.type = 'level'
            return True

        return False
 def check_length(self, line):
     alt_transcript = Transcript(line)
     if alt_transcript.length > self.transcript.length:
         self.type = 'length'
         self.transcript = alt_transcript
         return True
     return False
def main():
    uiRoot = tkinter.Tk()
    uiRoot.configure(background="black")

    textView = tkinter.Text(uiRoot, font=('Tiresias', 21))
    textView.configure(background='black')
    textView.tag_config('unstable', foreground='gray')
    textView.tag_config('stable', foreground='white')

    language_code = 'nl-NL'  # a BCP-47 language tag
    model = Transcript()
    service = TranscriptionService(language_code, model)

    def close_window():
        service.stop()
        uiRoot.destroy()

    uiRoot.protocol("WM_DELETE_WINDOW", close_window)

    textView.after(50, updateUI, textView, model)
    textView.pack()

    stopButton = tkinter.Button(uiRoot)
    buttonDecoration = Switch(stopButton, service)
    stopButton.pack(fill=tkinter.X)

    uiRoot.mainloop()
Exemple #8
0
def homework_html_to_LaTeX(file_in, soln=False):
    global week_number
    the_homework = Transcript(file_in)
    the_homework_text = the_homework.text
    soup = BeautifulSoup(the_homework_text, 'html.parser')
    print('souped up!')

    if not soln:
        file_name = file_in.strip().split('/')[9][0:-8] + "LaTeXnosoln.txt"
        file_out = open(
            'C:/Users/Justin Yan/Documents/Development/Python/AoPSCleanScript/AoPSCleanScript/homework_LaTeX/'
            + file_name, 'w')
        print('File opened for writing')
    else:
        file_name = file_in.strip().split('/')[9][0:-8] + "LaTeXwithsoln.txt"
        file_out = open(
            'C:/Users/Justin Yan/Documents/Development/Python/AoPSCleanScript/AoPSCleanScript/homework_LaTeX/'
            + file_name, 'w')
        print('File opened for writing')

    week_number = file_name.split('HTML')[0]
    transcribe_preamble(soup, file_out)
    #process the problem body
    transcribe_problems(soup, file_out, soln)

    file_out.write('\end{document}')
Exemple #9
0
def parsePro(filename):
    ''' Return a dictionary with transcript id (e.g. 0300689) pointing to coverage level
    '''
    threshold = 0.00005

    transcripts = dict()
    with open(filename, 'r') as f:
        for line in f:
            row = line.strip().split('\t')
            if len(row) < 8:
                continue

            tag = row[1]
            sep1 = row[0].find(':')
            sep2 = row[0].find('-', sep1)
            sep3 = row[0].find('W', sep2)
            chrom = row[0][:sep1]
            start = int(row[0][sep1 + 1:sep2])
            end = int(row[0][sep2 + 1:sep3])
            fraction = float(row[8])
            #cov = float(row[7])
            #if cov > 0:
            #    fraction = float(row[11]) / cov

            if fraction > threshold:
                transcripts[tag] = Transcript(chrom, start, end, fraction, tag)
    return transcripts
def processSAM(sam, genome):
    # This function extracts the SAM header (because we'll need that later) and creates a Transcript object for every sam transcript. 
    # Transcripts are returned two separate lists: one canonical and one noncanonical. 

    header = ""
    canTranscripts = {}
    noncanTranscripts = {} 
    #unmodifiedTranscripts = {} # Place to put transcripts that didn't map or multimapped.
    with open(sam, 'r') as f:
        for line in f:
            line = line.strip()
            if line.startswith("@"):
                header = header + line + "\n"
                continue
            t = Transcript(line, genome)
            #print Transcript.getNMandMDFlags(t, genome)
            
            # Filter out transcripts that are multimapping
            if int(t.FLAG) > 16:
                continue
                #unmodifiedTranscripts[t.QNAME] = t
            # Skip unmapped transcripts altogether
            if t.CHROM == "*":
                continue
            if t.isCanonical == True:
                canTranscripts[t.QNAME] = t
            else:
                noncanTranscripts[t.QNAME] = t
    return header, canTranscripts, noncanTranscripts #, unmodifiedTranscripts
Exemple #11
0
def compareGTFs(truthGTF, compGTF):
    transcriptsTruth = dict()
    with open(truthGTF, 'r') as tsv:
        for line in tsv:
            row = line.strip().split('\t')
            if len(row) < 5:
                continue

            transcriptIdIndex = row[8].find('transcript_id')
            transcriptIdStart = row[8].find('"', transcriptIdIndex) + 1
            transcriptIdEnd = row[8].find('"', transcriptIdStart)
            transcriptId = row[8][transcriptIdStart:transcriptIdEnd]

            print(row[2])

            if row[2] == 'transcript':
                print('Found transcript ' + str(transcriptId))
                transcriptsTruth[transcriptId] = Transcript(
                    row[0], int(row[3]), int(row[4]), 1, transcriptId)
            elif row[2] == 'exon' and transcriptId in transcriptsTruth:
                transcriptsTruth[transcriptId].exons.append(
                    (int(row[3]), int(row[4])))

    transcriptsTruth = transcriptsTruth.values()

    transcriptsComp = dict()
    with open(compGTF, 'r') as tsv:
        for line in tsv:
            row = line.strip().split('\t')
            if len(row) < 5:
                continue

            transcriptIdIndex = row[8].find('transcript_id')
            transcriptIdStart = row[8].find('"', transcriptIdIndex) + 1
            transcriptIdEnd = row[8].find('"', transcriptIdStart)
            transcriptId = row[8][transcriptIdStart:transcriptIdEnd]

            if row[2] == 'transcript':
                transcriptsComp[transcriptId] = Transcript(
                    row[0], int(row[3]), int(row[4]), 1, transcriptId)
            elif row[2] == 'exon' and transcriptId in transcriptsComp:
                transcriptsComp[transcriptId].exons.append(
                    (int(row[3]), int(row[4])))

    transcriptsComp = transcriptsComp.values()

    compareAll(transcriptsTruth, transcriptsComp)
def extract_features(gtfs, feature_types=('exon', 'junction')):
    annotated_features = {}
    for feature_type in feature_types:
	annotated_features[feature_type] = Set()
    for gtf in gtfs:
	features = Transcript.extract_features(gtf)
	for feature_type in feature_types:
	    annotated_features[feature_type] = annotated_features[feature_type].union(features[feature_type])
    return annotated_features
Exemple #13
0
def extract_features(gtfs, feature_types=('exon', 'junction')):
    annotated_features = {}
    for feature_type in feature_types:
        annotated_features[feature_type] = Set()
    for gtf in gtfs:
        features = Transcript.extract_features(gtf)
        for feature_type in feature_types:
            annotated_features[feature_type] = annotated_features[
                feature_type].union(features[feature_type])
    return annotated_features
Exemple #14
0
def build_gene(elements, fasta=None, ref_genes=None):
    gene_min = min(
        min(e) for e in chain(elements.tss_exons, elements.tes_exons,
                              elements.se_transcripts))
    gene_max = max(
        max(e) for e in chain(elements.tss_exons, elements.tes_exons,
                              elements.se_transcripts))

    transcripts = []
    for i, exons in enumerate(
            build_transcripts_from_elements(elements.tss_exons,
                                            elements.internal_exons,
                                            elements.tes_exons,
                                            elements.se_transcripts,
                                            elements.introns,
                                            elements.strand)):
        transcript = Transcript("%s_%i" % (elements.id, i),
                                elements.chrm,
                                elements.strand,
                                exons,
                                cds_region=None,
                                gene_id=elements.id)
        transcript.promoter = find_matching_promoter_for_transcript(
            transcript, elements.promoter)
        transcript.polya_region = find_matching_polya_region_for_transcript(
            transcript, elements.polyas)
        transcripts.append(transcript)

    if len(transcripts) == 0:
        return None

    gene = Gene(elements.id, elements.id, elements.chrm, elements.strand,
                gene_min, gene_max, transcripts)

    if fasta != None:
        gene.transcripts = find_cds_for_gene(gene,
                                             fasta,
                                             only_longest_orf=True)

    if ref_genes != None:
        gene = rename_transcripts(gene, ref_genes)

    return gene
Exemple #15
0
    def use_the_non_NA_transcript_supported(self, line):
        if line.attrs['transcript_support_level'] == 'NA':
            return True

        if self.transcript.attrs['transcript_support_level'] == 'NA':
            self.transcript = Transcript(line)
            self.type = 'transcript_support_level'
            return True

        return False
Exemple #16
0
 def check_first_transcript(self, line):
     if "CCDS" in line.attrs[
             'tags']:  #check that it's a member of the consensus CDS gene set
         self.transcript = Transcript(line)
         if 'MANE_Select' in self.transcript.attrs[
                 'tags']:  #the transcript belongs to the MANE Select data set
             self.type = 'MANE_Select'
         else:
             self.type = 'only_transcript'
     else:
         self.type = 'one_rejected_transcript'
Exemple #17
0
    def check_MANE_dataset(self, line):
        if 'MANE_Select' in self.transcript.attrs['tags']:
            self.type = 'MANE_Select'
            return True

        elif 'MANE_Select' in line.attrs['tags']:
            self.transcript = Transcript(line)
            self.type = 'MANE_Select'
            return True

        return False
Exemple #18
0
    def check_CCDS(self, line):
        if "CCDS" in line.attrs[
                'tags'] and "CCDS" not in self.transcript.attrs['tags']:
            self.transcript = Transcript(line)
            self.type = 'CCDS'
            return True

        elif "CCDS" not in line.attrs[
                'tags'] and "CCDS" in self.transcript.attrs['tags']:
            return True

        return False
Exemple #19
0
    def check_support_level(self, line):
        line_transcript_support_level = int(
            line.attrs['transcript_support_level'])
        original_trnascript_support_level = int(
            self.transcript.attrs['transcript_support_level'])

        if line_transcript_support_level < original_trnascript_support_level:
            self.transcript = Transcript(line)
            self.type = 'transcript_support_level'
            return True

        return False
Exemple #20
0
def compareGTFs(proFile, truthGTF, compGTF):
    # file 1 is a .pro file output by flux
    transcriptsTruth = parsePro(proFile)

    with open(truthGTF, 'r') as tsv:
        for line in tsv:
            row = line.strip().split('\t')
            if len(row) < 5:
                continue

            transcriptIdIndex = row[8].find('transcript_id')
            transcriptIdStart = row[8].find('"', transcriptIdIndex) + 1
            transcriptIdEnd = row[8].find('"', transcriptIdStart)
            transcriptId = row[8][transcriptIdStart:transcriptIdEnd]

            #if row[2] == 'transcript':
            #    transcriptsTruth[transcriptId] = Transcript(row[0], int(row[3]), int(row[4]), transcriptCovs[transcriptId])

            #if row[1] == 'protein_coding' and row[2] == 'exon' and transcriptId in transcriptsTruth:
            if row[2] == 'exon' and transcriptId in transcriptsTruth:
                transcriptsTruth[transcriptId].exons.append(
                    (int(row[3]), int(row[4])))
    transcriptsTruth = transcriptsTruth.values()

    transcriptsComp = dict()
    with open(compGTF, 'r') as tsv:
        for line in tsv:
            row = line.strip().split('\t')
            if len(row) < 5:
                continue

            covIndex = row[8].find('cov')
            covStart = row[8].find('"', covIndex) + 1
            covEnd = row[8].find('"', covStart)
            cov = float(row[8][covStart:covEnd])

            transcriptIdIndex = row[8].find('transcript_id')
            transcriptIdStart = row[8].find('"', transcriptIdIndex) + 1
            transcriptIdEnd = row[8].find('"', transcriptIdStart)
            transcriptId = row[8][transcriptIdStart:transcriptIdEnd]

            if row[2] == 'transcript':
                transcriptsComp[transcriptId] = Transcript(
                    row[0], int(row[3]), int(row[4]), cov, transcriptId)
            elif row[2] == 'exon':
                transcriptsComp[transcriptId].exons.append(
                    (int(row[3]), int(row[4])))

    transcriptsComp = transcriptsComp.values()

    compareAll(transcriptsTruth, transcriptsComp)
Exemple #21
0
    def __init__(self, game, x, y):
        self.groups = game.all_sprites
        pg.sprite.Sprite.__init__(self, self.groups)
        self.game = game
        self.images = {'normal': pg.image.load(path.join(game.img_folder, "apple_64px.png")).convert_alpha(), \
                       'blink': pg.image.load(path.join(game.img_folder, "apple_64px_blink.png")).convert_alpha(), \
                       'wink': pg.image.load(path.join(game.img_folder, "apple_64px_wink.png")).convert_alpha()}
        self.blinks = False
        self.blink_time = .25
        self.staring_time = 3
        self.start_time = time.time()
        self.image = self.images['normal']
        self.rect = self.image.get_rect()
        self.rect.center = (x, y)
        self.hit_rect = self.rect
        self.hit_rect.center = self.rect.center
        self.vel = vec(0, 0)
        self.position = vec(x, y)
        self.dest = vec_dest(x, y)
        self.previous_pos = vec_prev(x, y)
        self.instruction = ""
        self.orientation = "front" # left, right, front, back
        self.name = "Young Apple"
        self.silence_responses = ["can you please say that again?", "oops, I missed that. say again?", "I heard *silence*", "repeat again, please?", "could you say again?", "I didn't hear that, try again?", "I heard *silence*"]
        self.knowledge = Knowledge(self)
        self.transcript = Transcript()

        # Working memory properties
        self.recognized = []
        self.actions = [] # current, complete list of action sequences e.g. [[1],[[0],[2]]]
        self.input_to_actions = []
        self.action_queue = [] # remaining actions to be completed
        self.current_action = []
        self.key_used = ""
        #self.responses = []
        self.response = ""
def cleanNoncanonical(transcripts, annotatedJunctions, genome):
    # Iterate over noncanonical transcripts. Determine whether each end is within 5 basepairs of an annotated junction.
    # If it is, run the rescue function on it. If not, discard the transcript.

    o = open("tmp_nc.bed", 'w')
    salvageableNCJns = 0
    totNC = len(transcripts)
    for tID in transcripts.keys():
        t = transcripts[tID]
        bounds = Transcript.getAllIntronBounds(t)
        
        for b in bounds:
            if b.isCanonical == True:
                continue
            
            # Get BedTool object for start of junction
            pos = IntronBound.getBED(b)
            o.write(pos + "\n")            

    o.close()
    os.system('sort -k1,1 -k2,2n tmp_nc.bed > sorted_tmp_nc.bed')
    nc = pybedtools.BedTool("sorted_tmp_nc.bed")
    jnMatches = str(nc.closest(annotatedJunctions, s=True, D="ref", t="first")).split("\n")
   
    os.system("rm tmp_nc.bed")
    os.system("rm sorted_tmp_nc.bed")
    os.system("rm tmp.bed")
    os.system("rm tmp2.bed")

    # Iterate over splice junction boundaries and their closest canonical match. 
    for match in jnMatches:
        if len(match) == 0: continue
        match = match.split('\t')
        d = int(match[-1])
        transcriptID, spliceJnNum, side = match[3].split("__")
        
        # Only attempt to rescue junction boundaries that are within 5 bp of an annotated junction
        if abs(d) > 5:
            #transcripts.pop(transcriptID, None)
            continue
        
        currTranscript = transcripts[transcriptID]
        currJunction = currTranscript.spliceJunctions[int(spliceJnNum)]
        currIntronBound = currJunction.bounds[int(side)]
        rescueNoncanonicalJunction(currTranscript, currJunction, currIntronBound, d, genome)

    return
Exemple #23
0
def build_merged_transcript(gene_id, clustered_transcripts):
    # find hte transcript bounds
    start, stop = 1e20, 0
    for transcript in clustered_transcripts:
        start = min(start, transcript.exons[0][0])
        stop = max(stop, transcript.exons[-1][-1])

    # merge the promoters
    try:
        new_promoter = (min(t.promoter[0] for t in clustered_transcripts
                            if t.promoter != None),
                        max(t.promoter[1] for t in clustered_transcripts
                            if t.promoter != None))
    except ValueError:
        new_promoter = None

    # merge the polyas
    try:
        new_polya = (min(t.polya_region[0] for t in clustered_transcripts
                         if t.polya_region != None),
                     max(t.polya_region[1] for t in clustered_transcripts
                         if t.polya_region != None))
    except ValueError:
        new_polya = None

    # choose a tempalte transcript, and make sure that all of the
    # clustered transcripts have the same internal structure (
    # this should be guaranteed by the calling function )
    bt = clustered_transcripts[0]
    assert all(t.IB_key() == bt.IB_key() for t in clustered_transcripts)
    new_exons = list(bt.exons)
    new_exons[0] = (start, new_exons[0][1])
    new_exons[-1] = (new_exons[-1][0], stop)
    # choose a random id - this should be renamed in the next step
    new_trans_id = gene_id + "_RNDM_%i" % random.randrange(1e9)
    new_transcript = Transcript(new_trans_id,
                                bt.chrm,
                                bt.strand,
                                new_exons,
                                bt.cds_region,
                                gene_id,
                                name=bt.name,
                                gene_name=bt.gene_name,
                                promoter=new_promoter,
                                polya_region=new_polya)

    return new_transcript
def lect_to_TeX(args):
    # file to be read
    file_in = args.file_in
    file_out = args.file_out
    image_path = args.image_out
    file_name = args.file_name

    #instantiate Transcript object
    the_transcript = Transcript(file_in)
    #access string instance var containing HTML text
    transcript_text = the_transcript.text
    #BeautifulSoup object allows easier traverse of HTML text
    soup = BeautifulSoup(transcript_text, 'html.parser')

    O = open(file_out + file_name, 'w')

    transcribe_preamble(soup, O, image_path)

    transcribe_msgs(soup, O, image_path)

    O.write(r'\end{document}')
    counter = 0
Exemple #25
0
def cli():
    print(f'###########################\n'
          '#   GPA Calculator v0.3   #\n'
          '# Developed by Daanish KS #\n'
          '###########################\n')

    session = PromptSession()  # Enables file path history for convenience

    while True:
        csv_file = session.prompt('Transcript CSV file path: ',
                                  completer=file_completion(),
                                  validator=file_validation(),
                                  validate_while_typing=True)
        x = Transcript(csv_file)

        file_request = prompt('Write GPA report to file [y/n]? ',
                              validator=yes_no_validation(),
                              validate_while_typing=True)

        if file_request in {'Y', 'y', 'YES', 'Yes', 'yes'}:
            report_type = prompt('JSON [1] or YAML [2]? ',
                                 validator=report_type_validation(),
                                 validate_while_typing=True)
            if report_type == '1':
                x.gpa_report_to_file(file_path='gpa_report.json')
            if report_type == '2':
                x.gpa_report_to_file(file_path='gpa_report.yaml')
        print()
        yaml.dump(x.gpa_report(round_place=3), sys.stdout)
        print()

        repeat_request = prompt('Continue [y/n]? ',
                                validator=yes_no_validation(),
                                validate_while_typing=True)
        if repeat_request in {'N', 'n', 'NO', 'No', 'no'}:
            break
        else:
            print(f'\n-------------------------\n')
 def transcript(self):
     raw_transcript = self.browser.open(urls['transcript'])
     return Transcript.from_html(raw_transcript)
Exemple #27
0
def test_raises_no_mapped_segments(alignments):
    with pytest.raises(NoMappedSegmentsError):
        Transcript(alignments, DEFAULT_SKIP, DEFAULT_MAP)
Exemple #28
0
def test_pre_mRNA_only(flna_annotations, args):
    transcript = Transcript(*args['transcript_args'])
    assert flna_annotations.get_annotations(
        transcript, args['junction_tolerance']) == ['pre-mRNA']
Exemple #29
0
def test_NM_001456_only(flna_annotations, args):
    transcript = Transcript(*args['transcript_args'])
    assert flna_annotations.get_annotations(
        transcript, args['junction_tolerance']) == ['NM_001456']
Exemple #30
0
 def __init__(self, expression=None, active=False, half_life=False, *args, **kwargs):
     Transcript.__init__(self, *args, **kwargs)        
     self.expression = expression
     self.active = active
     self.half_life = half_life
Exemple #31
0
import tableproxy
from transcript import Transcript

__all__ = ['Transcript']

Transcript = tableproxy.getProxy(Transcript)
        
if __name__ == "__main__":
    L = [[(None,None)]*10,
         ['sw',0,1.23,2.01,'A','male','native','how are you',1,1,1,'report'],
         ['sw',0,2.01,2.53,'B','female','native',"I'm fine",1,1,2,'report']]
    trans = Transcript.importList(L)
    trans.printTable()
Exemple #32
0
class Agent(pg.sprite.Sprite):
    def __init__(self, game, x, y):
        self.groups = game.all_sprites
        pg.sprite.Sprite.__init__(self, self.groups)
        self.game = game
        self.images = {'normal': pg.image.load(path.join(game.img_folder, "apple_64px.png")).convert_alpha(), \
                       'blink': pg.image.load(path.join(game.img_folder, "apple_64px_blink.png")).convert_alpha(), \
                       'wink': pg.image.load(path.join(game.img_folder, "apple_64px_wink.png")).convert_alpha()}
        self.blinks = False
        self.blink_time = .25
        self.staring_time = 3
        self.start_time = time.time()
        self.image = self.images['normal']
        self.rect = self.image.get_rect()
        self.rect.center = (x, y)
        self.hit_rect = self.rect
        self.hit_rect.center = self.rect.center
        self.vel = vec(0, 0)
        self.position = vec(x, y)
        self.dest = vec_dest(x, y)
        self.previous_pos = vec_prev(x, y)
        self.instruction = ""
        self.orientation = "front" # left, right, front, back
        self.name = "Young Apple"
        self.silence_responses = ["can you please say that again?", "oops, I missed that. say again?", "I heard *silence*", "repeat again, please?", "could you say again?", "I didn't hear that, try again?", "I heard *silence*"]
        self.knowledge = Knowledge(self)
        self.transcript = Transcript()

        # Working memory properties
        self.recognized = []
        self.actions = [] # current, complete list of action sequences e.g. [[1],[[0],[2]]]
        self.input_to_actions = []
        self.action_queue = [] # remaining actions to be completed
        self.current_action = []
        self.key_used = ""
        #self.responses = []
        self.response = ""

    def turn(self, direction):
        """
        change the orientation of the agent to a different direction
        """
        # self.image.blit(self.img_0/90/180/270, ((x, y)))
        pass

    def give_name(self, new_name):
        self.name = new_name
        mapped_meaning = self.knowledge.lexicon()["you"]
        self.knowledge.add_to_lexicon(new_name, mapped_meaning)

    def blink(self):
        """
        Changes the apple's image to make the agent blink. 
        """
        end_time = time.time()
        elapsed = end_time - self.start_time
        if not self.blinks and elapsed > self.staring_time:
            self.image = self.images['blink']
            self.blinks = True
            self.start_time = end_time
        elif self.blinks and elapsed > self.blink_time:
            self.image = self.images['normal']
            self.blinks = False
            self.start_time = end_time

    def move_if_clear_path(self):
        """
        Checks whether the agent can continue moving to its destination 
        on a clear x and y path. If clear, moves the agent closer to its destination.
        """
        #TODO: adjust math.isclose to also check for x and y board limit value?
        clear_path = not math.isclose(self.position.x, self.dest.x, rel_tol=1e-09, abs_tol=0.5) or \
                     not math.isclose(self.position.y, self.dest.y, rel_tol=1e-09, abs_tol=0.5)
        no_walls = True

        if clear_path:
            self.knowledge.set_direction()
            #print(self.position, self.dest)
            self.position += self.vel * self.game.dt
            self.hit_rect.centerx = self.position.x
            walls_x = collide_with_walls(self, self.game.walls, 'x')
            self.hit_rect.centery = self.position.y
            walls_y = collide_with_walls(self, self.game.walls, 'y')
            self.rect.center = self.hit_rect.center
        
            if walls_x or walls_y:
                no_walls = False
                #print("walls: " + str(self.position) + ", " + str(self.dest))

        #if clear_path and no_walls:
            #printif("all clear: " + str(self.position) + ", " + str(self.dest))

        #print("checked for clear path: "+str(clear_path))
        return clear_path and no_walls
            
    def listen(self):
        '''
        Listens for a speech command, while either the 'SPACE' key or 'M' key is pressed.
        If given, command is stored in self.instruction property of the agent.
        '''
        #UNCOMMENT FOR SPEECH VERSION
        keys = pg.key.get_pressed()
        if keys[pg.K_SPACE]:
            self.key_used = "SPACE"
            self.action_queue = []
            self.response = ''
            self.vel = vec(0, 0)
            self.dest = vec_dest(self.position.x, self.position.y)
            with sr.Microphone() as source:
                try:
                    audio = r.listen(source, timeout=5)
                    self.instruction = r.recognize_google(audio).lower()
                    printif("\nYou: " + str(self.instruction))
                except:
                    self.instruction = ''
                    self.response = random.choice(self.silence_responses)
                    printif("\nYou: *silence*")
                    printif("(Hm? Can you please say that again?)")

        elif keys[pg.K_m]:
            self.key_used = "M"
            self.action_queue = []
            self.vel = vec(0, 0)
            self.dest = vec_dest(self.position.x, self.position.y)
            with sr.Microphone() as source:
            # call STT (speech to text) class to get the wav file to predict
                printif("listening...")
                try:
                    audio = r.listen(source, timeout=5)
                    self.game.morgan_speech.saveAudio(audio)
                    self.instruction = self.game.morgan_speech.getTranscription().lower()
                    printif("You: " + str(self.instruction))
                except:
                    self.response = random.choice(self.silence_responses)
                    printif("Hm? Can you please say that again?")
                    self.instruction = ''

        # ## TEXT-ONLY INPUT
        # self.instruction = input("\nType something: ").lower()
        # attempt = self.attempt()
        # printif(self.name + ": " + str(attempt))

        return self.instruction

    def interpret(self):
        """
        The Agent processes the instruction (temporarily stored in self) into
        1) words from its lexicon and learned phrases
        2) a list of actions to carry out
        """
        recognized = []
        actions = []
        unknowns = ""
        instruction = self.instruction # the input string from the user

        instruction_split = instruction.split()  # split sentence into list of words
        lexicon = self.knowledge.lexicon()
        learned = self.knowledge.learned()
        instruction_minus_phrases = instruction

        # First check for learned phrases
        for phrase in learned:
            if phrase in instruction:
                printif("found the phrase: " + str(phrase))
                recognized.append(phrase)
                actions.append(learned[phrase])

                # If found, remove phrase from instruction
                instruction_minus_phrases = instruction.replace(phrase, " ")

        instruction_split = instruction_minus_phrases.split()

        # Then check for remaining recognized words in the lexicon
        for word in instruction_split:
            if word in lexicon:
                recognized.append(word)
                actions.append(lexicon[word])

        self.recognized = recognized
        self.actions = actions
        self.input_to_actions = [(r, a) for r, a in zip(self.recognized, self.actions)]

        printif("recognized: " + str(self.recognized) + "\n action list: " + str(self.actions))

        return (recognized, actions)

    def compose_actions(self, actions):
        """
        Composes the actions into a meaningful sequence. 
        (Here is where semantics are helpful...:))
        Returns the composed action sequence (a list of integer lists) e.g. [[1],[0],[3]]
        """
        # Restructure actions list into list of single actions e.g. [[1],[0],[3]].
        single_actions = []
        printif("composing actions... " + str(actions))
        for action_list in actions:
            for action in action_list:            
                single_actions.append([action]) # note that action is still inside a list e.g. [1]

        # Remove 'move' action if a destination action is already given.
        # TODO: have this make use of Action object type properties 
        move = 0
        destinations = [1,2,3,4,7,9,10]
        if [move] in single_actions: # move function
            for action in single_actions:
                if action[0] in destinations: # destination function
                    while [move] in single_actions:
                        single_actions.remove([move])

        # If 'me' and 'yes' action are both present, remove the 'me' action. (Patchy fix)
        me = 8
        yes = 5
        if [me] in single_actions and [yes] in single_actions:
            while [me] in single_actions:
                single_actions.remove([me])
        
        printif("composed: " + str(single_actions))
        return single_actions

    def store_action_queue(self):
        '''
        Stores the current parsed actions into the action queue. 
        First composes actions into single list of actions e.g. from [[[0], [1]], [[1]]] to [[0],[1]]
        '''
        self.action_queue = self.compose_actions(self.actions)
        printif("stored action queue: " + str(self.action_queue))
        return self.action_queue

    def compose_feedback(self):
        """
        Composes feedback into input-based text response.
        TODO: Continue updating for improved input integration.
        """
        single_actions = self.action_queue
        input_to_actions = self.input_to_actions

        responses = ""

        for phrase, actions in input_to_actions:
            if len(actions) > 1 or len(phrase.split()) > 1:  # Phrase is a learned phrase as evidenced by actions > 1
                responses += phrase + " "                    # or phrase is multi-word.
            else: # Action integrates user input in response.
                action = actions[0]
                action_response = self.knowledge.actions[action](response_only=True, phrase=phrase)
                responses += action_response + " "

        # #print("- single_actions: " + str(single_actions))
        # for actions in single_actions:
        #     #print("- actions: " + str(actions))
        #     for action in actions:
        #         #print("- action: " + str(action))
        #         action_response = self.knowledge.actions[action](response_only=True)
        #         responses += action_response + " "
        
        if responses:
            self.response = responses
        else:
            # Agent responds to fully unfamiliar phrases by repeating instruction
            self.response = "how do I " + self.instruction + "?" 

        printif(self.name + ": " + str(self.response))
        return self.response

    def attempt(self):
        '''
        Attempts the first action in the queue.
        '''
        action = self.action_queue[0][0]
        action_info = [action, self.transcript.entry_number()] # allows repeated actions in new queues
        #TODO: make sure this allows for the same action twice
        if action_info != self.current_action: # keeps the agent from re-calling the current action
            self.current_action = action_info
            self.knowledge.actions[action]()

    def pop_action(self):
        '''
        Pop first action from queue.
        '''
        popped = self.action_queue.pop(0)
        printif("popped: " + str(popped))

        if len(self.action_queue) == 0:
            printif("actions completed (" + str(self.action_queue) + ")")
        
        return popped


    def give_text_feedback(self):
        textRect = pg.Rect(0, 0, 0, 0)
        font = pg.font.Font(self.game.title_font, 15)
        textSurf = font.render(self.response, True, BLACK).convert_alpha()
        textSize = textSurf.get_size()
        bubbleSurf = pg.Surface((textSize[0] * 2., textSize[1] * 2))
        textRect = bubbleSurf.get_rect()
        bubbleSurf.fill(WHITE)
        bubbleSurf.blit(textSurf, textSurf.get_rect(center=textRect.center))
        textRect.center = ((WIDTH/2), (450))
        self.game.screen.blit(bubbleSurf, textRect)
        

    def update(self):
        self.listen()
        if self.instruction and not self.action_queue:
            printif("there is an instruction and no action queue yet")
            
            # Interpret instruction
            self.interpret()
            
            # Store action queue
            self.store_action_queue()
            
            # Compose feedback into response text
            self.compose_feedback()
            
            # Save to transcript
            self.transcript.store(self.key_used, self.instruction, self.action_queue.copy(), self.response)

            # Reset instruction
            self.instruction = ""

        if self.action_queue:
            # Attempt action in queue
            self.attempt()
    
        self.blink()
        self.rect = self.image.get_rect()
        self.rect.center = self.position

        still_moving = self.move_if_clear_path()

        #printif("still moving: " + str(still_moving))
        if not still_moving and self.action_queue:
            printif("popping action now...")
            self.pop_action()
        
        # If task completed, save the task string to the transcript
        if self.game.goal_completed:
            self.transcript.store_success(self.game.goal_completed[0])

        
        self.transcript.save()
        
Exemple #33
0
 def __init__(self, address, chats):
     who = Transcript(address, chats)
     self.cell = SharedCell(who)
Exemple #34
0
    def is_read_through(self, txts, mm):
	"""Determines if event is read-through"""
	last_matched_block, last_matched_exon = self.last_matched()	
	for txt2 in txts:
	    if txt2.strand != self.txt.strand:
		continue
	    
	    if txt2.model != self.txt.model:
		continue
	    
	    if txt2.name == self.txt.name or txt2.alias == self.txt.alias:
		continue
			
	    if not overlap([self.align_coords[0][0], self.align_coords[-1][1]], [txt2.txStart, txt2.txEnd]) or\
	       overlap([self.txt.txStart, self.txt.txEnd], [txt2.txStart, txt2.txEnd]):
		continue
	    		
	    if overlap(last_matched_block, [txt2.txStart, txt2.txEnd]):
		continue
							
	    result = mm.match_exons(self.contig, txt2.full_name(), self.align_coords, txt2.exons, txt2.chrom, strand=txt2.strand)    	    	
	    if result and len(result.matched_blocks) == len(self.align_blocks):				
		exon_bounds_matched = True
		for i in range(len(result.matched_blocks)):		    		
		    # only 1 boundary has to be flush if it's terminal block
		    if i == len(self.align_blocks) - 1:
			if self.txt.txStart < txt2.txStart:
			    if self.align_coords[result.matched_blocks[i] - 1][0] != txt2.exons[result.matched_exons[i] - 1][0]:
				exon_bounds_matched = False
				
			else:
			    if self.align_coords[result.matched_blocks[i] - 1][1] != txt2.exons[result.matched_exons[i] - 1][1]:
				exon_bounds_matched = False
			
		    # both boundaries have to be flush if it's not terminal block
		    else:
			if not(self.align_coords[result.matched_blocks[i] - 1][0] == txt2.exons[result.matched_exons[i] - 1][0] and\
			       self.align_coords[result.matched_blocks[i] - 1][1] == txt2.exons[result.matched_exons[i] - 1][1]):
			    exon_bounds_matched = False
			    
		if not exon_bounds_matched:
		    continue
		
		if self.txt.txStart < txt2.txStart:
		    txt_span = [int(self.txt.txEnd) + 1, int(txt2.txStart) - 1]
		else:
		    txt_span = [int(txt2.txEnd) + 1, int(self.txt.txStart) - 1]
				
		# make sure there is no transcripts in between the 1st and 2nd transcripts
		has_txt_between = False
		for t in txts:
		    if t.name == self.txt.name or t.name == txt2.name:
			continue
				
		    if subsume([t.txStart, t.txEnd], txt_span):
			has_txt_between = True
			break
			
		    if not has_txt_between:			
			if self.txt.alias and txt2.alias and type(self.txt.alias) is str and type(txt2.alias) is str:
			    if not Transcript.same_family(self.txt.alias, txt2.alias):
				self.event_type = 'read-through'
				self.txt2 = txt2