from Seq1 import Seq Practice = 1 Exercise = 9 Folder = "../Session-04/" Ext = ".txt" GenesFile = ["U5", "ADA", "FRAT1", "FXN", "RNU6_269P"] Bases = ['A', 'T', 'C', 'G'] print(f"------| Practice {Practice}, Exercise {Exercise} |------") for gene in GenesFile: l = Seq().read_fasta(Folder + gene + Ext) #Make the seq "easy" to read dic = l.count() #Make a disctionary that counts the values listl = list(dic.values()) #Make a list with those values m = max(listl) #Find out the most common one print(f"Gene {gene}: Most frequent Base: {Bases[listl.index(m)]}")
from Seq1 import Seq print("-----| Practice 1, Exercise 4 |------") seq1 = Seq() seq2 = Seq("ACTGA") seq3 = Seq("ASSFFFJ") print(f"Sequence 1: (Length: {Seq.len(seq1)}) {seq1}") print(f"Sequence 2: (Length: {Seq.len(seq2)}) {seq2}") print(f"Sequence 3: (Length: {Seq.len(seq3)}) {seq3}")
def comp_cmd(strseq): s = Seq(strseq) return s.complement()
PORT = 8080 FOLDER = "../Session-04/" EXT = ".txt" GENE = "FRAT1" # -- Create the client objects for connecting to the servers c1 = Client(IP, PORT) c2 = Client(IP, PORT + 1) # -- Print the IP and PORTs print(c1) print(c2) # -- Read the Gene from a file s = Seq().read_fasta(FOLDER + GENE + EXT) # -- Get the gene string bases = str(s) # -- Print the Gene on the console print(f"Gene {GENE}: {bases}") # -- Length of fragments LENGTH = 10 # -- Send the initial message to both servers init_msg = f"Sending {GENE} Gene to the server, in fragments of {LENGTH} bases..." c1.talk(init_msg) c2.talk(init_msg)
from Seq1 import Seq print("Practice 1, exercise 7") s1 = Seq() s2 = Seq("GATCCTAGGACGTA") s3 = Seq("Invalid seq") print(f"Sequence 1: length {s1.len()} {s1}") print(f"Bases:{s1.count()}") print(f"Reverse:{s1.reverse()}") print(f"Sequence 2: length {s2.len()} {s2}") print(f"Bases:{s2.count()}") print(f"Reverse:{s2.reverse()}") print(f"Sequence 3: length {s3.len()} {s3}") print(f"Bases:{s3.count()}") print(f"Reverse:{s3.reverse()}")
from Seq1 import Seq seq_1 = Seq("") FOLDER = "../Session-04/" GENE = "U5" seq_1.read_fasta(FOLDER + GENE) print(f"Sequence : (Length: {seq_1.len()}) {seq_1}") print(f" Bases: {seq_1.count()}") print(f" Rev: {seq_1.reverse()}") print(f" Comp: {seq_1.complement()}")
def comp_seq(in_msg): get = Seq(spl_in(in_msg)) response = get.complement() print(response) return cs.send(str.encode(response))
exit() r1 = conn.getresponse() print(f"Response received!: {r1.status} {r1.reason}\n") data1 = r1.read().decode() gene = json.loads(data1) termcolor.cprint(f"Gene: ", "yellow", end="") print(f"{namegene}") termcolor.cprint("Description: ", "yellow", end="") print(f"{gene['desc']}" ) #see esembl page for desc and seq (especial functions) sequence = gene['seq'] s = Seq(sequence) l = s.length() ac = s.count_base("A") tc = s.count_base("T") cc = s.count_base("C") gc = s.count_base("G") termcolor.cprint("Total length: ", "yellow", end="") print(l) resp = f""" A: {ac} ({round((ac / l) * 100)})% C: {cc} ({round((cc / l) * 100)})% T: {tc} ({round((tc / l) * 100)})% G: {gc} ({round((gc / l) * 100)})%""" print(resp) dictionary = s.count()
from Seq1 import Seq # IMPORTANT!!!!!!!! # we are creating sequences passing a string with the bases to an object print("----| Exercise 1 |-----") s1 = Seq("ACTGA") print("Sequence 1: (Length: ", s1.len(), ")", s1)
from Seq1 import Seq def print_result(i,gen): print(f"Sequence {i}: (LENGTH: {gen.length()}) {gen}\nA: {gen.count_bases()[0]} C: {gen.count_bases()[1]} G: {gen.count_bases()[2]} T: {gen.count_bases()[3]}") print("------EXCERSISE 5-------") gen = Seq() gen_1 = Seq("ATATAT") gen_2 = Seq("Invalid Sequence") gen_list = [gen , gen_1 , gen_2] for i in range(0, len(gen_list)): print_result(i + 1, gen_list[i])
# Read the response's body data1 = r1.read().decode() # Create a data variable and form the JSON received gene = json.loads(data1) termcolor.cprint("Gene: ", 'yellow', end="") print(GENENAME) termcolor.cprint("Description: ", 'yellow', end="") print(gene['desc']) termcolor.cprint("Bases: ", 'yellow', end="") print(gene['seq']) gen = gene['seq'] s = Seq(gen) sl = s.len() counta = s.seq_count_bases('A') porta = round((100 * counta / sl), 2) countc = s.seq_count_bases('C') portc = round((100 * countc / sl), 2) countg = s.seq_count_bases('G') portg = round((100 * countg / sl), 2) countt = s.seq_count_bases('T') portt = round((100 * countt / sl), 2) termcolor.cprint("Total lenght: ", "yellow", end="") print(sl) termcolor.cprint("A: ", "blue", end="") print(counta, ",", porta, "%")
"RBMY2YP": "ENSG00000227633", "FGFR3": "ENSG00000068078", "KDR": "ENSG00000128052", "ANK2": "ENSG00000145362" } SERVER = "rest.ensembl.org" ENDPOINT = "/sequence/id/" PARAMETERS = "?content-type=application/json" connection = http.client.HTTPConnection(SERVER) try: for gene in gene_dict.keys(): id = gene_dict[gene] connection.request("GET", ENDPOINT + id + PARAMETERS) response = connection.getresponse() print(f"SERVER: {SERVER}") print(f"URL: {SERVER + ENDPOINT + PARAMETERS}") print(f"Response received!: {response.status} {response.reason} \n") if response.status == 200: response = json.loads(response.read().decode()) print(f"Gene: {gene}") print("Description:", response["desc"]) sequence = Seq(response["seq"]) print(sequence.info()) print(f"Most frequent base: {sequence.most_frequent_base()} \n") except KeyError: print("The gene is not inside the data base. Choose between the following:", list(gene_dict.keys()))
def do_GET(self): termcolor.cprint(self.requestline, 'green') req_line = self.requestline.split()[1] init = req_line.split("?")[0] try: if init == "/": contents = Path('main-page.html').read_text() self.send_response(200) elif init == "/listSpecies": parameters = req_line.split("?")[1] values = parameters.split("&") info = get_info("info/species?")["species"] if len(values) == 2: input, json = values limit = input.split("=")[1] if json == "json=1": list = [] counter = 0 if limit == "": for element in info: list.append(element["display_name"]) counter += 1 contents = dict_listSpecies(limit, list) elif 267 > int(limit): for element in info: if counter < int(limit): list.append(element["display_name"]) counter += 1 contents = dict_listSpecies(limit, list) else: for element in info: if counter < int(limit): list.append(element["display_name"]) counter += 1 contents = dict_listSpecies(limit, list) self.send_response(200) else: contents = Path('error.json').read_text() self.send_response(404) elif len(values) == 1: limit = req_line.split("=")[1] contents = html("LIST OF SPECIES IN THE BROWSER", "lightblue") contents += f"""<h>The total number of species in ensembl is: 267</h><br>""" contents += f"""<h>The limit you have selected is: {limit}</h><br>""" contents += f"""<h>The names of the species are:</h>""" if limit == "": for element in info: contents += f"""<p> • {element["display_name"]}</p>""" elif 267 >= int(limit): counter = 0 for element in info: if counter < int(limit): contents += f"""<p> • {element["display_name"]}</p>""" counter += 1 else: for element in info: contents += f"""<p> • {element["display_name"]}</p>""" self.send_response(200) elif init == "/karyotype": parameters = req_line.split("?")[1] values = parameters.split("&") if len(values) == 2: input, json = values specie = input.split("=")[1] info = get_info("info/assembly/" + specie + "?")["karyotype"] if json == "json=1": list = [] for element in info: list.append(element) contents = dict_karyotype(list) self.send_response(200) else: contents = Path('error.json').read_text() self.send_response(404) elif len(values) == 1: specie = req_line.split("=")[1] info = get_info("info/assembly/" + specie + "?")["karyotype"] contents = html("KARYOTYPE OF A SPECIFIC SPECIES", "lightblue") contents += f"""<h> The names of the chromosomes are: </h>""" for element in info: contents = contents + f"""<p> • {element}</p>""" self.send_response(200) elif init == "/chromosomeLength": parameters = req_line.split("?")[1] values = parameters.split("&") if len(values) == 3: input1, input2, json = values specie = input1.split("=")[1] chromo = input2.split("=")[1] info = get_info(f"info/assembly/" + specie + "?")["top_level_region"] if json == "json=1": for element in info: if element["name"] == chromo: length = element["length"] contents = dict_chromosomeLength(length) self.send_response(200) else: contents = Path('error.json').read_text() self.send_response(404) elif len(values) == 2: number = req_line.split("=")[2] values = req_line.split("=")[1] specie = values.split("&")[0] info = get_info(f"info/assembly/" + specie + "?")["top_level_region"] for element in info: if element["name"] == number: contents = html("LENGTH OF THE CHROMOSOME SELECTED", "lightblue") contents += f"""<h> The length of the chromosome is: {element["length"]}</h>""" self.send_response(200) elif init == "/geneSeq": parameters = req_line.split("?")[1] values = parameters.split("&") if len(values) == 2: input, json = values gene = input.split("=")[1] gene_id = get_info(f"/xrefs/symbol/homo_sapiens/{gene}?")[0]["id"] info = get_info(f"/sequence/id/{gene_id}?") if json == "json=1": contents = dict_geneSeq(info["seq"]) self.send_response(200) else: contents = Path('error.json').read_text() self.send_response(404) elif len(values) == 1: gene = req_line.split("=")[1] gene_id = get_info(f"/xrefs/symbol/homo_sapiens/{gene}?")[0]["id"] info = get_info(f"/sequence/id/{gene_id}?") contents = html("GENE SEQUENCE", "lightyellow") contents += f'<p> The sequence of gene {gene} is: </p>' contents += f'<textarea rows = "100" "cols = 500"> {info["seq"]} </textarea>' self.send_response(200) elif init == "/geneInfo": parameters = req_line.split("?")[1] values = parameters.split("&") if len(values) == 2: input, json = values gene = input.split("=")[1] gene_id = get_info(f"/xrefs/symbol/homo_sapiens/{gene}?")[0]["id"] info = get_info(f"/lookup/id/{gene_id}?") if json == "json=1": length = info["end"]-info["start"] contents = dict_geneInfo(info["start"], info["end"], length, info["id"], info["seq_region_name"]) self.send_response(200) else: contents = Path('error.json').read_text() self.send_response(404) elif len(values) == 1: gene = req_line.split("=")[1] gene_id = get_info(f"/xrefs/symbol/homo_sapiens/{gene}?")[0]["id"] info = get_info(f"/lookup/id/{gene_id}?") contents = html("INFO ABOUT A GENE", "lightyellow") contents += f'<h1> Information about the introduced gene: {gene}</h1>' contents += f'<p> The start point is: {info["start"]}</p>' contents += f'<p> The end point is: {info["end"]}</p>' contents += f'<p> The length of the gene is: {info["end"]-info["start"]}</p>' contents += f'<p> The id of the gene is: {info["id"]}</p>' contents += f'<p> The chromosome of that gene is: {info["seq_region_name"]}</p>' self.send_response(200) elif init == "/geneCalc": parameters = req_line.split("?")[1] values = parameters.split("&") if len(values) == 2: input, json = values gene = input.split("=")[1] gene_id = get_info(f"/xrefs/symbol/homo_sapiens/{gene}?")[0]["id"] info = get_info(f"/sequence/id/{gene_id}?")["seq"] sequence = Seq(info) if json == "json=1": bases = [] for base in list_bases: bases.append(sequence.count_base(base)[1]) contents = dict_geneCalc(sequence.len(), bases) self.send_response(200) else: contents = Path('error.json').read_text() self.send_response(404) elif len(values) == 1: gene = req_line.split("=")[1] gene_id = get_info(f"/xrefs/symbol/homo_sapiens/{gene}?")[0]["id"] info = get_info(f"/sequence/id/{gene_id}?")["seq"] sequence = Seq(info) contents = html("BASES CALCULATION", "lightyellow") contents += f'<h1> Calculations over the introduced gene: {gene}</h1>' contents += f'<p> Total length of this gene is: {sequence.len()}</p>' contents += f'<p> The percentage of each base in the sequence of this gene is:</p>' for base in list_bases: contents += f"<p>{base}: ({sequence.count_base(base)[1]}%)</p>" self.send_response(200) elif init == "/geneList": parameters = req_line.split("?")[1] values = parameters.split("&") if len(values) == 4: input1, input2, input3, json = values chromo = input1.split("=")[1] start = input2.split("=")[1] end = input3.split("=")[1] info = get_info(f"/overlap/region/human/{chromo}:{start}-{end}?feature=gene;") if json == "json=1": list = [] for element in info: list.append(element["external_name"]) contents = dict_geneList(list) self.send_response(200) else: contents = Path('error.json').read_text() self.send_response(404) elif len(values) == 3: values = req_line.split("?")[1] chromo, start, end = values.split("&") chromo_value = chromo.split("=")[1] start_value = start.split("=")[1] end_value = end.split("=")[1] info = get_info(f"/overlap/region/human/{chromo_value}:{start_value}-{end_value}?feature=gene;") contents = html("LIST OF GENES OF A CHROMOSOME", "lightyellow") contents += f'<h2> List of genes located in the introduced chromosome: {chromo_value}</h2>' for element in info: contents += f'<p>- {element["external_name"]}</p>' self.send_response(200) else: contents = Path('error.html').read_text() self.send_response(404) except (KeyError, TypeError, ValueError, IndexError): contents = Path('error.html').read_text() self.send_response(200) endpoints = ["/", "/listSpecies", "/karyotype", "/chromosomeLength", "/geneSeq", "/geneInfo", "/geneCalc", "/geneList"] if init in endpoints: if "json" in req_line: type = "application/json" else: type = "text/html" self.send_header('Content-Type', type) self.send_header('Content-Length', len(str.encode(contents))) self.end_headers() self.wfile.write(str.encode(contents)) return
# -- Read the response's body data1 = r1.read().decode("utf-8") # -- Create a variable with the data, # -- form the JSON received console = json.loads(data1) sequence = console['id'] termcolor.cprint('GENE:', 'green') print(element) termcolor.cprint("Description:", 'green') print(console['desc']) termcolor.cprint('Bases:', 'green') print(console['seq']) sequence = Seq(console['seq']) termcolor.cprint('Total length:', 'green') print(sequence.len()) termcolor.cprint('A:', 'blue') print(sequence.count_base('A')) print('(', sequence.count_base('A') * 100 / sequence.len(), '%)') termcolor.cprint('C:', 'blue') print(sequence.count_base('C')) print('(', sequence.count_base('C') * 100 / sequence.len(), '%)') termcolor.cprint('G:', 'blue') print(sequence.count_base('G')) print('(', sequence.count_base('G') * 100 / sequence.len(), '%)') termcolor.cprint('T:', 'blue') print(sequence.count_base('T')) print('(', sequence.count_base('T') * 100 / sequence.len(), '%)')
def rev(cs, argument): print_colored("REV", "green") seq = Seq(argument) reverse = seq.reverse() print(reverse) cs.send(reverse.encode())
from Seq1 import Seq PRACTICE = 1 EXERCISE = 2 print(f"-----| Practice {PRACTICE}, Exercise {EXERCISE} |------") # -- Create a Null sequence s1 = Seq() # -- Create a valid sequence s2 = Seq("ACTGA") print(f"Sequence 1: {s1}") print(f"Sequence 2: {s2}")
from Seq1 import Seq print("-----| Exercise 1 |------") seq1 = Seq("ACTGA") print(f"Sequence 1: (Length: {seq1.len()}) {seq1}")
from Seq1 import Seq # -- Creating a Null sequence s_null = Seq() # -- Creating a valid sequence s = Seq("TATAC") print("Sequence 1:", s_null) print("Sequence 2:", s)
from Seq1 import Seq folder = "../Session-04/" list_genes = ["U5.txt", "FRAT1.txt", "FXN.txt", "ADA.txt", "RNU6_269P.txt"] bases_list = ["A", "C", "G", "T"] s = Seq("") for e in list_genes: s = s.read_fasta(folder + e) max_base = "" count_values = 0 for key, value in s.count().items(): while value > count_values: count_values = value base_max = key print("Gene", e, ":", "The most repeated base is:", base_max)
from Seq1 import Seq sequence = Seq("ACTGA") print("-----| Practice 1, Exercise 1 |------") print() print(f"Sequence 1: (Lenght : {sequence.len()})", f"{sequence}")
def rev_seq(in_msg): get = Seq(spl_in(in_msg)) response = get.reverse() print(response) return cs.send(str.encode(response))
from Seq1 import Seq def print_result(i, sequence): print('Sequence' + str(i) + ': (Length:' + str(sequence.len()) + ') ' + str(sequence)) print('Bases:', sequence.count()) print('Rev:', sequence.reverse()) print('Complement:', sequence.complement()) print('-----|Practice 1, Exercise 9|-----') s1 = Seq() s1.read_fasta('ADA.txt') print_result('', s1)
from client0 import Client from Seq1 import Seq PRACTICE = 2 EXERCISE = 1 print(f"-----| Practice {2}, Exercise {7} |------") IP = "127.0.0.1" PORT = 12000 PORT_2 = 12002 c = Client(IP, PORT) c_2 = Client(IP, PORT_2) s = Seq() s.read_fasta('../Session-04/FRAT1.txt') i = 0 count = 0 while i < len(s.str_bases) and count < 10: fragment = s.str_bases[i:i + 10] count += 1 i += 10 fragment_text = "Fragment " + str(count) + ": " + fragment print(fragment_text) if count % 2 == 0: print(c_2.debug_talk(fragment_text)) else: print(c.debug_talk(fragment_text))
from Seq1 import Seq print("-----| Practice 1, Exercise 5 |------") seq1 = Seq() seq2 = Seq("ACTGA") seq3 = Seq("Invalid sequence") sequence_list = [seq1, seq2, seq3] for sequence in sequence_list: a, c, t, g = Seq.count_base(sequence) bases_dict = { "A": a, "C": c, "T": t, "G": g, } print( f"Sequence {sequence_list.index(sequence)}: (Length: {Seq.len(sequence)}) {sequence}" ) print(bases_dict)
from Seq1 import Seq seq_list = [Seq("ACT")] for i in seq_list: print("Sequence",(seq_list.index(i)+1),":(Length:", i.len(),")",i)
from Seq1 import Seq print("-----| Practice 1, Exercise 10 |------") FOLDER = "../Session04/" EXT = ".txt" GENES = ["U5", "ADA", "FRAT1", "FXN", "RNU6_269P"] bases = ['A', 'C', 'T', 'G'] for g in GENES: se = Seq().seq_read_fasta(FOLDER + g + EXT) dictionary = se.count() lit = list(dictionary.values()) most_common = max(lit) print("Gene ", g, ": Most frequent Base:", bases[lit.index(most_common)])
#Session 7.Exercise 7 from Seq1 import Seq print("-----| Practice 1, Exercise 7 |------") #--null sequence s1 = Seq() # --valid sequence s2 = Seq("ACTGA") # --invalid sequence s3 = Seq("Invalid sequence") list = [s1, s2, s3] for s in list: print("Sequence", list.index(s), ": (Lenght:", s.len(), ")", s) print(" Bases:", s.count()) print(" Rev: ", s.reverse())
def comp(cs, argument): print_colored("COMP", "green") seq = Seq(argument) complement = seq.complement() print(complement) cs.send(complement.encode())
def rev_cmd(strseq): s = Seq(strseq) return s.reverse()
# -- Read the response's body data1 = r1.read().decode("utf-8") # -- Create a variable with the data, # -- form the JSON received response = json.loads(data1) termcolor.cprint(f"Gene: ", "green", end="") print(ask_user) termcolor.cprint(f"Description: ", "green", end="") print(response['desc']) seq_bases = response['seq'] sequence = Seq(seq_bases) termcolor.cprint(f"Total length: ", "green", end="") print(sequence.len()) for base in bases: count = sequence.count_base(base) percentage = round(sequence.count_base(base) * (100 / sequence.len()), 2) termcolor.cprint(f"{base}", 'blue', end="") print(f": {count} ({percentage}%)") d = sequence.seq_count() ll = list(d.values()) m = max(ll) termcolor.cprint("Most frequent Base:", 'green', end="") print(f"{bases[ll.index(m)]}")