def build_gene(elements, fasta=None, ref_genes=None): gene_min = min( min(e) for e in chain( elements.tss_exons, elements.tes_exons, elements.se_transcripts)) gene_max = max( max(e) for e in chain( elements.tss_exons, elements.tes_exons, elements.se_transcripts)) transcripts = [] for i, exons in enumerate( build_transcripts_from_elements( elements.tss_exons, elements.internal_exons, elements.tes_exons, elements.se_transcripts, elements.introns, elements.strand ) ): transcript = Transcript( "%s_%i" % ( elements.id, i ), elements.chrm, elements.strand, exons, cds_region=None, gene_id=elements.id) transcript.promoter = find_matching_promoter_for_transcript( transcript, elements.promoter) transcript.polya_region = find_matching_polya_region_for_transcript( transcript, elements.polyas) transcripts.append( transcript ) if len(transcripts) == 0: return None gene = Gene(elements.id, elements.id, elements.chrm, elements.strand, gene_min, gene_max, transcripts) if fasta != None: gene.transcripts = find_cds_for_gene( gene, fasta, only_longest_orf=True ) if ref_genes != None: gene = rename_transcripts(gene, ref_genes) return gene
def load_assembled_transcripts(filename, ref_genome): datafile = open(filename) data = datafile.readlines() list_transcripts = [] for line in data: column = line.split('\t') if column[2] == "transcript": try: list_transcripts.append(itranscript) except: pass transcript_info = column[8].split(' ') transcript_id = transcript_info[3].strip() gene_id = transcript_info[1].strip() chromosome = column[0] if chromosome in ref_genome.chromosomes_dict: fpkm = transcript_info[7].strip(";").strip("\"") sign = column[6].strip() itranscript = Transcript(transcript_id, gene_id, chromosome, fpkm, sign) else: pass elif column[2] == "exon": if column[0] in ref_genome.chromosomes_dict: transcript_info = column[8].split(' ') exon_start = int(column[3]) exon_end = int(column[4]) if itranscript.id == transcript_info[3]: itranscript.add_exon([exon_start, exon_end], ref_genome.chromosomes_dict[itranscript.chromosome][exon_start - 1:exon_end]) else: print 'WARNING ', transcript_info, itranscript.id else: pass return list_transcripts
def writeTranscriptOutput(transcripts, outSam, outFa, genome): for t in transcripts.keys(): print t currTranscript = transcripts[t] outSam.write(Transcript.printableSAM(currTranscript, genome) + "\n") outFa.write(Transcript.printableFa(currTranscript) + "\n") return
def compareGTFs(truthGTF, compGTF): transcriptsTruth = dict() with open(truthGTF, 'r') as tsv: for line in tsv: row = line.strip().split('\t') if len(row) < 5: continue covIndex = row[8].find('cov') covStart = row[8].find('"', covIndex) + 1 covEnd = row[8].find('"', covStart) cov = float(row[8][covStart:covEnd]) transcriptIdIndex = row[8].find('transcript_id') transcriptIdStart = row[8].find('"', transcriptIdIndex) + 1 transcriptIdEnd = row[8].find('"', transcriptIdStart) transcriptId = row[8][transcriptIdStart:transcriptIdEnd] if row[2] == 'transcript': transcriptsTruth[transcriptId] = Transcript( row[0], int(row[3]), int(row[4]), cov, transcriptId) elif row[2] == 'exon': transcriptsTruth[transcriptId].exons.append( (int(row[3]), int(row[4]))) transcriptsTruth = transcriptsTruth.values() transcriptsComp = dict() with open(compGTF, 'r') as tsv: for line in tsv: row = line.strip().split('\t') if len(row) < 5: continue covIndex = row[8].find('cov') covStart = row[8].find('"', covIndex) + 1 covEnd = row[8].find('"', covStart) cov = float(row[8][covStart:covEnd]) transcriptIdIndex = row[8].find('transcript_id') transcriptIdStart = row[8].find('"', transcriptIdIndex) + 1 transcriptIdEnd = row[8].find('"', transcriptIdStart) transcriptId = row[8][transcriptIdStart:transcriptIdEnd] if row[2] == 'transcript': transcriptsComp[transcriptId] = Transcript( row[0], int(row[3]), int(row[4]), cov, transcriptId) elif row[2] == 'exon': transcriptsComp[transcriptId].exons.append( (int(row[3]), int(row[4]))) transcriptsComp = transcriptsComp.values() compareAll(transcriptsTruth, transcriptsComp)
def check_level(self, line): if int(line.attrs['level']) < int(self.transcript.attrs['level']): self.transcript = Transcript(line) self.type = 'level' return True return False
def check_length(self, line): alt_transcript = Transcript(line) if alt_transcript.length > self.transcript.length: self.type = 'length' self.transcript = alt_transcript return True return False
def main(): uiRoot = tkinter.Tk() uiRoot.configure(background="black") textView = tkinter.Text(uiRoot, font=('Tiresias', 21)) textView.configure(background='black') textView.tag_config('unstable', foreground='gray') textView.tag_config('stable', foreground='white') language_code = 'nl-NL' # a BCP-47 language tag model = Transcript() service = TranscriptionService(language_code, model) def close_window(): service.stop() uiRoot.destroy() uiRoot.protocol("WM_DELETE_WINDOW", close_window) textView.after(50, updateUI, textView, model) textView.pack() stopButton = tkinter.Button(uiRoot) buttonDecoration = Switch(stopButton, service) stopButton.pack(fill=tkinter.X) uiRoot.mainloop()
def homework_html_to_LaTeX(file_in, soln=False): global week_number the_homework = Transcript(file_in) the_homework_text = the_homework.text soup = BeautifulSoup(the_homework_text, 'html.parser') print('souped up!') if not soln: file_name = file_in.strip().split('/')[9][0:-8] + "LaTeXnosoln.txt" file_out = open( 'C:/Users/Justin Yan/Documents/Development/Python/AoPSCleanScript/AoPSCleanScript/homework_LaTeX/' + file_name, 'w') print('File opened for writing') else: file_name = file_in.strip().split('/')[9][0:-8] + "LaTeXwithsoln.txt" file_out = open( 'C:/Users/Justin Yan/Documents/Development/Python/AoPSCleanScript/AoPSCleanScript/homework_LaTeX/' + file_name, 'w') print('File opened for writing') week_number = file_name.split('HTML')[0] transcribe_preamble(soup, file_out) #process the problem body transcribe_problems(soup, file_out, soln) file_out.write('\end{document}')
def parsePro(filename): ''' Return a dictionary with transcript id (e.g. 0300689) pointing to coverage level ''' threshold = 0.00005 transcripts = dict() with open(filename, 'r') as f: for line in f: row = line.strip().split('\t') if len(row) < 8: continue tag = row[1] sep1 = row[0].find(':') sep2 = row[0].find('-', sep1) sep3 = row[0].find('W', sep2) chrom = row[0][:sep1] start = int(row[0][sep1 + 1:sep2]) end = int(row[0][sep2 + 1:sep3]) fraction = float(row[8]) #cov = float(row[7]) #if cov > 0: # fraction = float(row[11]) / cov if fraction > threshold: transcripts[tag] = Transcript(chrom, start, end, fraction, tag) return transcripts
def processSAM(sam, genome): # This function extracts the SAM header (because we'll need that later) and creates a Transcript object for every sam transcript. # Transcripts are returned two separate lists: one canonical and one noncanonical. header = "" canTranscripts = {} noncanTranscripts = {} #unmodifiedTranscripts = {} # Place to put transcripts that didn't map or multimapped. with open(sam, 'r') as f: for line in f: line = line.strip() if line.startswith("@"): header = header + line + "\n" continue t = Transcript(line, genome) #print Transcript.getNMandMDFlags(t, genome) # Filter out transcripts that are multimapping if int(t.FLAG) > 16: continue #unmodifiedTranscripts[t.QNAME] = t # Skip unmapped transcripts altogether if t.CHROM == "*": continue if t.isCanonical == True: canTranscripts[t.QNAME] = t else: noncanTranscripts[t.QNAME] = t return header, canTranscripts, noncanTranscripts #, unmodifiedTranscripts
def compareGTFs(truthGTF, compGTF): transcriptsTruth = dict() with open(truthGTF, 'r') as tsv: for line in tsv: row = line.strip().split('\t') if len(row) < 5: continue transcriptIdIndex = row[8].find('transcript_id') transcriptIdStart = row[8].find('"', transcriptIdIndex) + 1 transcriptIdEnd = row[8].find('"', transcriptIdStart) transcriptId = row[8][transcriptIdStart:transcriptIdEnd] print(row[2]) if row[2] == 'transcript': print('Found transcript ' + str(transcriptId)) transcriptsTruth[transcriptId] = Transcript( row[0], int(row[3]), int(row[4]), 1, transcriptId) elif row[2] == 'exon' and transcriptId in transcriptsTruth: transcriptsTruth[transcriptId].exons.append( (int(row[3]), int(row[4]))) transcriptsTruth = transcriptsTruth.values() transcriptsComp = dict() with open(compGTF, 'r') as tsv: for line in tsv: row = line.strip().split('\t') if len(row) < 5: continue transcriptIdIndex = row[8].find('transcript_id') transcriptIdStart = row[8].find('"', transcriptIdIndex) + 1 transcriptIdEnd = row[8].find('"', transcriptIdStart) transcriptId = row[8][transcriptIdStart:transcriptIdEnd] if row[2] == 'transcript': transcriptsComp[transcriptId] = Transcript( row[0], int(row[3]), int(row[4]), 1, transcriptId) elif row[2] == 'exon' and transcriptId in transcriptsComp: transcriptsComp[transcriptId].exons.append( (int(row[3]), int(row[4]))) transcriptsComp = transcriptsComp.values() compareAll(transcriptsTruth, transcriptsComp)
def extract_features(gtfs, feature_types=('exon', 'junction')): annotated_features = {} for feature_type in feature_types: annotated_features[feature_type] = Set() for gtf in gtfs: features = Transcript.extract_features(gtf) for feature_type in feature_types: annotated_features[feature_type] = annotated_features[feature_type].union(features[feature_type]) return annotated_features
def extract_features(gtfs, feature_types=('exon', 'junction')): annotated_features = {} for feature_type in feature_types: annotated_features[feature_type] = Set() for gtf in gtfs: features = Transcript.extract_features(gtf) for feature_type in feature_types: annotated_features[feature_type] = annotated_features[ feature_type].union(features[feature_type]) return annotated_features
def build_gene(elements, fasta=None, ref_genes=None): gene_min = min( min(e) for e in chain(elements.tss_exons, elements.tes_exons, elements.se_transcripts)) gene_max = max( max(e) for e in chain(elements.tss_exons, elements.tes_exons, elements.se_transcripts)) transcripts = [] for i, exons in enumerate( build_transcripts_from_elements(elements.tss_exons, elements.internal_exons, elements.tes_exons, elements.se_transcripts, elements.introns, elements.strand)): transcript = Transcript("%s_%i" % (elements.id, i), elements.chrm, elements.strand, exons, cds_region=None, gene_id=elements.id) transcript.promoter = find_matching_promoter_for_transcript( transcript, elements.promoter) transcript.polya_region = find_matching_polya_region_for_transcript( transcript, elements.polyas) transcripts.append(transcript) if len(transcripts) == 0: return None gene = Gene(elements.id, elements.id, elements.chrm, elements.strand, gene_min, gene_max, transcripts) if fasta != None: gene.transcripts = find_cds_for_gene(gene, fasta, only_longest_orf=True) if ref_genes != None: gene = rename_transcripts(gene, ref_genes) return gene
def use_the_non_NA_transcript_supported(self, line): if line.attrs['transcript_support_level'] == 'NA': return True if self.transcript.attrs['transcript_support_level'] == 'NA': self.transcript = Transcript(line) self.type = 'transcript_support_level' return True return False
def check_first_transcript(self, line): if "CCDS" in line.attrs[ 'tags']: #check that it's a member of the consensus CDS gene set self.transcript = Transcript(line) if 'MANE_Select' in self.transcript.attrs[ 'tags']: #the transcript belongs to the MANE Select data set self.type = 'MANE_Select' else: self.type = 'only_transcript' else: self.type = 'one_rejected_transcript'
def check_MANE_dataset(self, line): if 'MANE_Select' in self.transcript.attrs['tags']: self.type = 'MANE_Select' return True elif 'MANE_Select' in line.attrs['tags']: self.transcript = Transcript(line) self.type = 'MANE_Select' return True return False
def check_CCDS(self, line): if "CCDS" in line.attrs[ 'tags'] and "CCDS" not in self.transcript.attrs['tags']: self.transcript = Transcript(line) self.type = 'CCDS' return True elif "CCDS" not in line.attrs[ 'tags'] and "CCDS" in self.transcript.attrs['tags']: return True return False
def check_support_level(self, line): line_transcript_support_level = int( line.attrs['transcript_support_level']) original_trnascript_support_level = int( self.transcript.attrs['transcript_support_level']) if line_transcript_support_level < original_trnascript_support_level: self.transcript = Transcript(line) self.type = 'transcript_support_level' return True return False
def compareGTFs(proFile, truthGTF, compGTF): # file 1 is a .pro file output by flux transcriptsTruth = parsePro(proFile) with open(truthGTF, 'r') as tsv: for line in tsv: row = line.strip().split('\t') if len(row) < 5: continue transcriptIdIndex = row[8].find('transcript_id') transcriptIdStart = row[8].find('"', transcriptIdIndex) + 1 transcriptIdEnd = row[8].find('"', transcriptIdStart) transcriptId = row[8][transcriptIdStart:transcriptIdEnd] #if row[2] == 'transcript': # transcriptsTruth[transcriptId] = Transcript(row[0], int(row[3]), int(row[4]), transcriptCovs[transcriptId]) #if row[1] == 'protein_coding' and row[2] == 'exon' and transcriptId in transcriptsTruth: if row[2] == 'exon' and transcriptId in transcriptsTruth: transcriptsTruth[transcriptId].exons.append( (int(row[3]), int(row[4]))) transcriptsTruth = transcriptsTruth.values() transcriptsComp = dict() with open(compGTF, 'r') as tsv: for line in tsv: row = line.strip().split('\t') if len(row) < 5: continue covIndex = row[8].find('cov') covStart = row[8].find('"', covIndex) + 1 covEnd = row[8].find('"', covStart) cov = float(row[8][covStart:covEnd]) transcriptIdIndex = row[8].find('transcript_id') transcriptIdStart = row[8].find('"', transcriptIdIndex) + 1 transcriptIdEnd = row[8].find('"', transcriptIdStart) transcriptId = row[8][transcriptIdStart:transcriptIdEnd] if row[2] == 'transcript': transcriptsComp[transcriptId] = Transcript( row[0], int(row[3]), int(row[4]), cov, transcriptId) elif row[2] == 'exon': transcriptsComp[transcriptId].exons.append( (int(row[3]), int(row[4]))) transcriptsComp = transcriptsComp.values() compareAll(transcriptsTruth, transcriptsComp)
def __init__(self, game, x, y): self.groups = game.all_sprites pg.sprite.Sprite.__init__(self, self.groups) self.game = game self.images = {'normal': pg.image.load(path.join(game.img_folder, "apple_64px.png")).convert_alpha(), \ 'blink': pg.image.load(path.join(game.img_folder, "apple_64px_blink.png")).convert_alpha(), \ 'wink': pg.image.load(path.join(game.img_folder, "apple_64px_wink.png")).convert_alpha()} self.blinks = False self.blink_time = .25 self.staring_time = 3 self.start_time = time.time() self.image = self.images['normal'] self.rect = self.image.get_rect() self.rect.center = (x, y) self.hit_rect = self.rect self.hit_rect.center = self.rect.center self.vel = vec(0, 0) self.position = vec(x, y) self.dest = vec_dest(x, y) self.previous_pos = vec_prev(x, y) self.instruction = "" self.orientation = "front" # left, right, front, back self.name = "Young Apple" self.silence_responses = ["can you please say that again?", "oops, I missed that. say again?", "I heard *silence*", "repeat again, please?", "could you say again?", "I didn't hear that, try again?", "I heard *silence*"] self.knowledge = Knowledge(self) self.transcript = Transcript() # Working memory properties self.recognized = [] self.actions = [] # current, complete list of action sequences e.g. [[1],[[0],[2]]] self.input_to_actions = [] self.action_queue = [] # remaining actions to be completed self.current_action = [] self.key_used = "" #self.responses = [] self.response = ""
def cleanNoncanonical(transcripts, annotatedJunctions, genome): # Iterate over noncanonical transcripts. Determine whether each end is within 5 basepairs of an annotated junction. # If it is, run the rescue function on it. If not, discard the transcript. o = open("tmp_nc.bed", 'w') salvageableNCJns = 0 totNC = len(transcripts) for tID in transcripts.keys(): t = transcripts[tID] bounds = Transcript.getAllIntronBounds(t) for b in bounds: if b.isCanonical == True: continue # Get BedTool object for start of junction pos = IntronBound.getBED(b) o.write(pos + "\n") o.close() os.system('sort -k1,1 -k2,2n tmp_nc.bed > sorted_tmp_nc.bed') nc = pybedtools.BedTool("sorted_tmp_nc.bed") jnMatches = str(nc.closest(annotatedJunctions, s=True, D="ref", t="first")).split("\n") os.system("rm tmp_nc.bed") os.system("rm sorted_tmp_nc.bed") os.system("rm tmp.bed") os.system("rm tmp2.bed") # Iterate over splice junction boundaries and their closest canonical match. for match in jnMatches: if len(match) == 0: continue match = match.split('\t') d = int(match[-1]) transcriptID, spliceJnNum, side = match[3].split("__") # Only attempt to rescue junction boundaries that are within 5 bp of an annotated junction if abs(d) > 5: #transcripts.pop(transcriptID, None) continue currTranscript = transcripts[transcriptID] currJunction = currTranscript.spliceJunctions[int(spliceJnNum)] currIntronBound = currJunction.bounds[int(side)] rescueNoncanonicalJunction(currTranscript, currJunction, currIntronBound, d, genome) return
def build_merged_transcript(gene_id, clustered_transcripts): # find hte transcript bounds start, stop = 1e20, 0 for transcript in clustered_transcripts: start = min(start, transcript.exons[0][0]) stop = max(stop, transcript.exons[-1][-1]) # merge the promoters try: new_promoter = (min(t.promoter[0] for t in clustered_transcripts if t.promoter != None), max(t.promoter[1] for t in clustered_transcripts if t.promoter != None)) except ValueError: new_promoter = None # merge the polyas try: new_polya = (min(t.polya_region[0] for t in clustered_transcripts if t.polya_region != None), max(t.polya_region[1] for t in clustered_transcripts if t.polya_region != None)) except ValueError: new_polya = None # choose a tempalte transcript, and make sure that all of the # clustered transcripts have the same internal structure ( # this should be guaranteed by the calling function ) bt = clustered_transcripts[0] assert all(t.IB_key() == bt.IB_key() for t in clustered_transcripts) new_exons = list(bt.exons) new_exons[0] = (start, new_exons[0][1]) new_exons[-1] = (new_exons[-1][0], stop) # choose a random id - this should be renamed in the next step new_trans_id = gene_id + "_RNDM_%i" % random.randrange(1e9) new_transcript = Transcript(new_trans_id, bt.chrm, bt.strand, new_exons, bt.cds_region, gene_id, name=bt.name, gene_name=bt.gene_name, promoter=new_promoter, polya_region=new_polya) return new_transcript
def lect_to_TeX(args): # file to be read file_in = args.file_in file_out = args.file_out image_path = args.image_out file_name = args.file_name #instantiate Transcript object the_transcript = Transcript(file_in) #access string instance var containing HTML text transcript_text = the_transcript.text #BeautifulSoup object allows easier traverse of HTML text soup = BeautifulSoup(transcript_text, 'html.parser') O = open(file_out + file_name, 'w') transcribe_preamble(soup, O, image_path) transcribe_msgs(soup, O, image_path) O.write(r'\end{document}') counter = 0
def cli(): print(f'###########################\n' '# GPA Calculator v0.3 #\n' '# Developed by Daanish KS #\n' '###########################\n') session = PromptSession() # Enables file path history for convenience while True: csv_file = session.prompt('Transcript CSV file path: ', completer=file_completion(), validator=file_validation(), validate_while_typing=True) x = Transcript(csv_file) file_request = prompt('Write GPA report to file [y/n]? ', validator=yes_no_validation(), validate_while_typing=True) if file_request in {'Y', 'y', 'YES', 'Yes', 'yes'}: report_type = prompt('JSON [1] or YAML [2]? ', validator=report_type_validation(), validate_while_typing=True) if report_type == '1': x.gpa_report_to_file(file_path='gpa_report.json') if report_type == '2': x.gpa_report_to_file(file_path='gpa_report.yaml') print() yaml.dump(x.gpa_report(round_place=3), sys.stdout) print() repeat_request = prompt('Continue [y/n]? ', validator=yes_no_validation(), validate_while_typing=True) if repeat_request in {'N', 'n', 'NO', 'No', 'no'}: break else: print(f'\n-------------------------\n')
def transcript(self): raw_transcript = self.browser.open(urls['transcript']) return Transcript.from_html(raw_transcript)
def test_raises_no_mapped_segments(alignments): with pytest.raises(NoMappedSegmentsError): Transcript(alignments, DEFAULT_SKIP, DEFAULT_MAP)
def test_pre_mRNA_only(flna_annotations, args): transcript = Transcript(*args['transcript_args']) assert flna_annotations.get_annotations( transcript, args['junction_tolerance']) == ['pre-mRNA']
def test_NM_001456_only(flna_annotations, args): transcript = Transcript(*args['transcript_args']) assert flna_annotations.get_annotations( transcript, args['junction_tolerance']) == ['NM_001456']
def __init__(self, expression=None, active=False, half_life=False, *args, **kwargs): Transcript.__init__(self, *args, **kwargs) self.expression = expression self.active = active self.half_life = half_life
import tableproxy from transcript import Transcript __all__ = ['Transcript'] Transcript = tableproxy.getProxy(Transcript) if __name__ == "__main__": L = [[(None,None)]*10, ['sw',0,1.23,2.01,'A','male','native','how are you',1,1,1,'report'], ['sw',0,2.01,2.53,'B','female','native',"I'm fine",1,1,2,'report']] trans = Transcript.importList(L) trans.printTable()
class Agent(pg.sprite.Sprite): def __init__(self, game, x, y): self.groups = game.all_sprites pg.sprite.Sprite.__init__(self, self.groups) self.game = game self.images = {'normal': pg.image.load(path.join(game.img_folder, "apple_64px.png")).convert_alpha(), \ 'blink': pg.image.load(path.join(game.img_folder, "apple_64px_blink.png")).convert_alpha(), \ 'wink': pg.image.load(path.join(game.img_folder, "apple_64px_wink.png")).convert_alpha()} self.blinks = False self.blink_time = .25 self.staring_time = 3 self.start_time = time.time() self.image = self.images['normal'] self.rect = self.image.get_rect() self.rect.center = (x, y) self.hit_rect = self.rect self.hit_rect.center = self.rect.center self.vel = vec(0, 0) self.position = vec(x, y) self.dest = vec_dest(x, y) self.previous_pos = vec_prev(x, y) self.instruction = "" self.orientation = "front" # left, right, front, back self.name = "Young Apple" self.silence_responses = ["can you please say that again?", "oops, I missed that. say again?", "I heard *silence*", "repeat again, please?", "could you say again?", "I didn't hear that, try again?", "I heard *silence*"] self.knowledge = Knowledge(self) self.transcript = Transcript() # Working memory properties self.recognized = [] self.actions = [] # current, complete list of action sequences e.g. [[1],[[0],[2]]] self.input_to_actions = [] self.action_queue = [] # remaining actions to be completed self.current_action = [] self.key_used = "" #self.responses = [] self.response = "" def turn(self, direction): """ change the orientation of the agent to a different direction """ # self.image.blit(self.img_0/90/180/270, ((x, y))) pass def give_name(self, new_name): self.name = new_name mapped_meaning = self.knowledge.lexicon()["you"] self.knowledge.add_to_lexicon(new_name, mapped_meaning) def blink(self): """ Changes the apple's image to make the agent blink. """ end_time = time.time() elapsed = end_time - self.start_time if not self.blinks and elapsed > self.staring_time: self.image = self.images['blink'] self.blinks = True self.start_time = end_time elif self.blinks and elapsed > self.blink_time: self.image = self.images['normal'] self.blinks = False self.start_time = end_time def move_if_clear_path(self): """ Checks whether the agent can continue moving to its destination on a clear x and y path. If clear, moves the agent closer to its destination. """ #TODO: adjust math.isclose to also check for x and y board limit value? clear_path = not math.isclose(self.position.x, self.dest.x, rel_tol=1e-09, abs_tol=0.5) or \ not math.isclose(self.position.y, self.dest.y, rel_tol=1e-09, abs_tol=0.5) no_walls = True if clear_path: self.knowledge.set_direction() #print(self.position, self.dest) self.position += self.vel * self.game.dt self.hit_rect.centerx = self.position.x walls_x = collide_with_walls(self, self.game.walls, 'x') self.hit_rect.centery = self.position.y walls_y = collide_with_walls(self, self.game.walls, 'y') self.rect.center = self.hit_rect.center if walls_x or walls_y: no_walls = False #print("walls: " + str(self.position) + ", " + str(self.dest)) #if clear_path and no_walls: #printif("all clear: " + str(self.position) + ", " + str(self.dest)) #print("checked for clear path: "+str(clear_path)) return clear_path and no_walls def listen(self): ''' Listens for a speech command, while either the 'SPACE' key or 'M' key is pressed. If given, command is stored in self.instruction property of the agent. ''' #UNCOMMENT FOR SPEECH VERSION keys = pg.key.get_pressed() if keys[pg.K_SPACE]: self.key_used = "SPACE" self.action_queue = [] self.response = '' self.vel = vec(0, 0) self.dest = vec_dest(self.position.x, self.position.y) with sr.Microphone() as source: try: audio = r.listen(source, timeout=5) self.instruction = r.recognize_google(audio).lower() printif("\nYou: " + str(self.instruction)) except: self.instruction = '' self.response = random.choice(self.silence_responses) printif("\nYou: *silence*") printif("(Hm? Can you please say that again?)") elif keys[pg.K_m]: self.key_used = "M" self.action_queue = [] self.vel = vec(0, 0) self.dest = vec_dest(self.position.x, self.position.y) with sr.Microphone() as source: # call STT (speech to text) class to get the wav file to predict printif("listening...") try: audio = r.listen(source, timeout=5) self.game.morgan_speech.saveAudio(audio) self.instruction = self.game.morgan_speech.getTranscription().lower() printif("You: " + str(self.instruction)) except: self.response = random.choice(self.silence_responses) printif("Hm? Can you please say that again?") self.instruction = '' # ## TEXT-ONLY INPUT # self.instruction = input("\nType something: ").lower() # attempt = self.attempt() # printif(self.name + ": " + str(attempt)) return self.instruction def interpret(self): """ The Agent processes the instruction (temporarily stored in self) into 1) words from its lexicon and learned phrases 2) a list of actions to carry out """ recognized = [] actions = [] unknowns = "" instruction = self.instruction # the input string from the user instruction_split = instruction.split() # split sentence into list of words lexicon = self.knowledge.lexicon() learned = self.knowledge.learned() instruction_minus_phrases = instruction # First check for learned phrases for phrase in learned: if phrase in instruction: printif("found the phrase: " + str(phrase)) recognized.append(phrase) actions.append(learned[phrase]) # If found, remove phrase from instruction instruction_minus_phrases = instruction.replace(phrase, " ") instruction_split = instruction_minus_phrases.split() # Then check for remaining recognized words in the lexicon for word in instruction_split: if word in lexicon: recognized.append(word) actions.append(lexicon[word]) self.recognized = recognized self.actions = actions self.input_to_actions = [(r, a) for r, a in zip(self.recognized, self.actions)] printif("recognized: " + str(self.recognized) + "\n action list: " + str(self.actions)) return (recognized, actions) def compose_actions(self, actions): """ Composes the actions into a meaningful sequence. (Here is where semantics are helpful...:)) Returns the composed action sequence (a list of integer lists) e.g. [[1],[0],[3]] """ # Restructure actions list into list of single actions e.g. [[1],[0],[3]]. single_actions = [] printif("composing actions... " + str(actions)) for action_list in actions: for action in action_list: single_actions.append([action]) # note that action is still inside a list e.g. [1] # Remove 'move' action if a destination action is already given. # TODO: have this make use of Action object type properties move = 0 destinations = [1,2,3,4,7,9,10] if [move] in single_actions: # move function for action in single_actions: if action[0] in destinations: # destination function while [move] in single_actions: single_actions.remove([move]) # If 'me' and 'yes' action are both present, remove the 'me' action. (Patchy fix) me = 8 yes = 5 if [me] in single_actions and [yes] in single_actions: while [me] in single_actions: single_actions.remove([me]) printif("composed: " + str(single_actions)) return single_actions def store_action_queue(self): ''' Stores the current parsed actions into the action queue. First composes actions into single list of actions e.g. from [[[0], [1]], [[1]]] to [[0],[1]] ''' self.action_queue = self.compose_actions(self.actions) printif("stored action queue: " + str(self.action_queue)) return self.action_queue def compose_feedback(self): """ Composes feedback into input-based text response. TODO: Continue updating for improved input integration. """ single_actions = self.action_queue input_to_actions = self.input_to_actions responses = "" for phrase, actions in input_to_actions: if len(actions) > 1 or len(phrase.split()) > 1: # Phrase is a learned phrase as evidenced by actions > 1 responses += phrase + " " # or phrase is multi-word. else: # Action integrates user input in response. action = actions[0] action_response = self.knowledge.actions[action](response_only=True, phrase=phrase) responses += action_response + " " # #print("- single_actions: " + str(single_actions)) # for actions in single_actions: # #print("- actions: " + str(actions)) # for action in actions: # #print("- action: " + str(action)) # action_response = self.knowledge.actions[action](response_only=True) # responses += action_response + " " if responses: self.response = responses else: # Agent responds to fully unfamiliar phrases by repeating instruction self.response = "how do I " + self.instruction + "?" printif(self.name + ": " + str(self.response)) return self.response def attempt(self): ''' Attempts the first action in the queue. ''' action = self.action_queue[0][0] action_info = [action, self.transcript.entry_number()] # allows repeated actions in new queues #TODO: make sure this allows for the same action twice if action_info != self.current_action: # keeps the agent from re-calling the current action self.current_action = action_info self.knowledge.actions[action]() def pop_action(self): ''' Pop first action from queue. ''' popped = self.action_queue.pop(0) printif("popped: " + str(popped)) if len(self.action_queue) == 0: printif("actions completed (" + str(self.action_queue) + ")") return popped def give_text_feedback(self): textRect = pg.Rect(0, 0, 0, 0) font = pg.font.Font(self.game.title_font, 15) textSurf = font.render(self.response, True, BLACK).convert_alpha() textSize = textSurf.get_size() bubbleSurf = pg.Surface((textSize[0] * 2., textSize[1] * 2)) textRect = bubbleSurf.get_rect() bubbleSurf.fill(WHITE) bubbleSurf.blit(textSurf, textSurf.get_rect(center=textRect.center)) textRect.center = ((WIDTH/2), (450)) self.game.screen.blit(bubbleSurf, textRect) def update(self): self.listen() if self.instruction and not self.action_queue: printif("there is an instruction and no action queue yet") # Interpret instruction self.interpret() # Store action queue self.store_action_queue() # Compose feedback into response text self.compose_feedback() # Save to transcript self.transcript.store(self.key_used, self.instruction, self.action_queue.copy(), self.response) # Reset instruction self.instruction = "" if self.action_queue: # Attempt action in queue self.attempt() self.blink() self.rect = self.image.get_rect() self.rect.center = self.position still_moving = self.move_if_clear_path() #printif("still moving: " + str(still_moving)) if not still_moving and self.action_queue: printif("popping action now...") self.pop_action() # If task completed, save the task string to the transcript if self.game.goal_completed: self.transcript.store_success(self.game.goal_completed[0]) self.transcript.save()
def __init__(self, address, chats): who = Transcript(address, chats) self.cell = SharedCell(who)
def is_read_through(self, txts, mm): """Determines if event is read-through""" last_matched_block, last_matched_exon = self.last_matched() for txt2 in txts: if txt2.strand != self.txt.strand: continue if txt2.model != self.txt.model: continue if txt2.name == self.txt.name or txt2.alias == self.txt.alias: continue if not overlap([self.align_coords[0][0], self.align_coords[-1][1]], [txt2.txStart, txt2.txEnd]) or\ overlap([self.txt.txStart, self.txt.txEnd], [txt2.txStart, txt2.txEnd]): continue if overlap(last_matched_block, [txt2.txStart, txt2.txEnd]): continue result = mm.match_exons(self.contig, txt2.full_name(), self.align_coords, txt2.exons, txt2.chrom, strand=txt2.strand) if result and len(result.matched_blocks) == len(self.align_blocks): exon_bounds_matched = True for i in range(len(result.matched_blocks)): # only 1 boundary has to be flush if it's terminal block if i == len(self.align_blocks) - 1: if self.txt.txStart < txt2.txStart: if self.align_coords[result.matched_blocks[i] - 1][0] != txt2.exons[result.matched_exons[i] - 1][0]: exon_bounds_matched = False else: if self.align_coords[result.matched_blocks[i] - 1][1] != txt2.exons[result.matched_exons[i] - 1][1]: exon_bounds_matched = False # both boundaries have to be flush if it's not terminal block else: if not(self.align_coords[result.matched_blocks[i] - 1][0] == txt2.exons[result.matched_exons[i] - 1][0] and\ self.align_coords[result.matched_blocks[i] - 1][1] == txt2.exons[result.matched_exons[i] - 1][1]): exon_bounds_matched = False if not exon_bounds_matched: continue if self.txt.txStart < txt2.txStart: txt_span = [int(self.txt.txEnd) + 1, int(txt2.txStart) - 1] else: txt_span = [int(txt2.txEnd) + 1, int(self.txt.txStart) - 1] # make sure there is no transcripts in between the 1st and 2nd transcripts has_txt_between = False for t in txts: if t.name == self.txt.name or t.name == txt2.name: continue if subsume([t.txStart, t.txEnd], txt_span): has_txt_between = True break if not has_txt_between: if self.txt.alias and txt2.alias and type(self.txt.alias) is str and type(txt2.alias) is str: if not Transcript.same_family(self.txt.alias, txt2.alias): self.event_type = 'read-through' self.txt2 = txt2