def write_filtered_sequences_and_stats(sequence_dict, sequence_report_general, filtered_sequence_file, saveExcludedContigs): if saveExcludedContigs: path_excluded_contigs = os.path.splitext(filtered_sequence_file)[0] + '.excluded_contigs.fasta' excluded_contigs = open(path_excluded_contigs, 'wt') found_excluded_contigs = False with open(os.path.join(os.path.dirname(filtered_sequence_file), str( 'assembly_mapping_report.sequences_filtered.' + os.path.splitext(os.path.basename(filtered_sequence_file))[ 0]) + '.tab'), 'wt') as report_filtered: with open(filtered_sequence_file, 'wt') as contigs_filtered: fields = ['header', 'length'] report_filtered.write('\n'.join( ['#general', '>contigs', str(sequence_report_general['filtered']['contigs']), '>bp', str(sequence_report_general['filtered']['bp'])]) + '\n') report_filtered.write('#' + '\t'.join(fields) + '\n') for i in range(1, len(sequence_dict) + 1): if not sequence_dict[i]['discard']: report_filtered.write('\t'.join([str(sequence_dict[i][f]) for f in fields]) + '\n') contigs_filtered.write('>' + sequence_dict[i]['header'] + '\n' + '\n'.join( utils.chunkstring(sequence_dict[i]['sequence'], 80)) + '\n') else: if saveExcludedContigs: found_excluded_contigs = True excluded_contigs.write('>' + sequence_dict[i]['header'] + '\n' + '\n'.join( utils.chunkstring(sequence_dict[i]['sequence'], 80)) + '\n') if saveExcludedContigs: excluded_contigs.flush() excluded_contigs.close() if not found_excluded_contigs: os.remove(path_excluded_contigs)
def plot_graphics(self, data): global ticks try: ticks = [':'.join(chunkstring(x[0], 2)) for x in data] print(data) print(ticks) int_time = list(range(len(data))) temp1 = [float(x[1]) for x in data] temp2 = [float(x[2]) for x in data] press1 = [float(x[3]) for x in data] press2 = [float(x[4]) for x in data] for graph in [self.temp_graph, self.press_graph]: graph.clear() graph.axis.xaxis.set_major_formatter(FuncFormatter(tick_func)) graph.timelen = len(data) self.temp_graph.axis.plot(int_time, temp1, 'C1', label='T1') self.temp_graph.axis.plot(int_time, temp2, 'C3', label='T2') self.temp_graph.axis.legend() self.press_graph.axis.plot(int_time, press1, 'C4', label='P1') self.press_graph.axis.plot(int_time, press2, 'C2', label='P2') self.press_graph.axis.legend() self.temp_graph.canvas.draw() self.press_graph.canvas.draw() except: QtWidgets.QMessageBox.warning(self.MainWindow, "Ошибка", "Лог-файл поврежден!")
def write_filtered_sequences_and_stats(sequence_dict, sequence_report_general, filtered_sequence_file, saveExcludedContigs): if saveExcludedContigs: path_excluded_contigs = os.path.splitext( filtered_sequence_file)[0] + '.excluded_contigs.fasta' excluded_contigs = open(path_excluded_contigs, 'wt') found_excluded_contigs = False with open( os.path.join( os.path.dirname(filtered_sequence_file), str('assembly_mapping_report.sequences_filtered.' + os.path. splitext(os.path.basename(filtered_sequence_file))[0]) + '.tab'), 'wt') as report_filtered: with open(filtered_sequence_file, 'wt') as contigs_filtered: fields = ['header', 'length'] report_filtered.write('\n'.join([ '#general', '>contigs', str(sequence_report_general['filtered']['contigs']), '>bp', str(sequence_report_general['filtered']['bp']) ]) + '\n') report_filtered.write('#' + '\t'.join(fields) + '\n') for i in range(1, len(sequence_dict) + 1): if not sequence_dict[i]['discard']: report_filtered.write( '\t'.join([str(sequence_dict[i][f]) for f in fields]) + '\n') contigs_filtered.write( '>' + sequence_dict[i]['header'] + '\n' + '\n'.join( utils.chunkstring(sequence_dict[i]['sequence'], 80)) + '\n') else: if saveExcludedContigs: found_excluded_contigs = True excluded_contigs.write( '>' + sequence_dict[i]['header'] + '\n' + '\n'.join( utils.chunkstring(sequence_dict[i]['sequence'], 80)) + '\n') if saveExcludedContigs: excluded_contigs.flush() excluded_contigs.close() if not found_excluded_contigs: os.remove(path_excluded_contigs)
def on_finish_fetching(self): if pyboard.last_operation: self.update_status('Выберите файл') files_raw = ''.join([chr(x) for x in pyboard.recieved_file]) if files_raw != 'no': files = [ '.'.join(chunkstring(x, 2)) for x in chunkstring(files_raw, 10) ] print(files) for filename in files: self.files_list.addItem(filename) self.open_butt.setEnabled(True) else: QtWidgets.QMessageBox.error(self, "Ошибка", "Не удалось получить список файлов") self.fetching_list = False
def clean_novel_alleles(novel_alleles, scheme_mlst, profile): """ Clean the fasta file with the novel alleles produced by mlst Parameters ---------- novel_alleles : str Path for fasta file containing the novel alleles scheme_mlst : str MLST schema found by mlst profile : list List of strings with the profile found Returns ------- """ unknown_genes = [] for gene_allele in profile: gene = gene_allele.split('(')[0] try: allele = gene_allele.split('(')[1].rstrip(')') if allele.startswith('~'): unknown_genes.append(gene) except IndexError as e: print('WARNING: {}'.format(e)) try: novel_alleles_keep = {} if len(unknown_genes) > 0: reader = open(novel_alleles, mode='rt') # TODO: newline=None in Python3 fasta_iter = (g for k, g in itertools_groupby(reader, lambda x: x.startswith('>'))) for header in fasta_iter: # header = header.__next__()[1:].rstrip('\r\n') # TODO: Python3 header = header.next()[1:].rstrip('\r\n') # seq = ''.join(s.rstrip('\r\n') for s in fasta_iter.__next__()) # TODO: Python3 seq = ''.join(s.rstrip('\r\n') for s in fasta_iter.next()) if header.startswith(scheme_mlst): gene = header.split('.')[1].split('~')[0] if gene in unknown_genes: novel_alleles_keep[header] = seq reader.close() os.remove(novel_alleles) if len(novel_alleles_keep) > 0: with open(novel_alleles, 'wt') as writer: for header, seq in novel_alleles_keep.items(): writer.write('>{}\n'.format(header)) writer.write('\n'.join(utils.chunkstring(seq, 80)) + '\n') except OSError as e: # TODO: FileNotFoundError in Python3 print('An unknown ST was found but no novel alleles fasta file was produced by mlst software:\n' '{}'.format(e))
def on_download_existing_finished(self, status): print("BEGIN PARSING") print(pyboard.recieved_file) files_raw = ''.join([chr(x) for x in pyboard.recieved_file]) print("EDITOR data", files_raw) if files_raw != 'no': files = chunkstring(files_raw, 3) self.recipes_list.clear() for filename in files: self.recipes_list.addItem(filename) self.open_button.setEnabled(True) self.save_button.setEnabled(True) self.warn('Список существующих рецептов получен!')
def clean_novel_alleles(novel_alleles, scheme_mlst, profile): """ Clean the fasta file with the novel alleles produced by mlst Parameters ---------- novel_alleles : str Path for fasta file containing the novel alleles scheme_mlst : str MLST schema found by mlst profile : list List of strings with the profile found Returns ------- """ unknown_genes = [] for gene_allele in profile: gene = gene_allele.split('(')[0] try: allele = gene_allele.split('(')[1].rstrip(')') if allele.startswith('~'): unknown_genes.append(gene) except IndexError as e: print('WARNING: {}'.format(e)) novel_alleles_keep = {} if len(unknown_genes) > 0: reader = open(novel_alleles, mode='rt') # TODO: newline=None in Python3 fasta_iter = ( g for k, g in itertools_groupby(reader, lambda x: x.startswith('>'))) for header in fasta_iter: # header = header.__next__()[1:].rstrip('\r\n') # TODO: Python3 header = header.next()[1:].rstrip('\r\n') # seq = ''.join(s.rstrip('\r\n') for s in fasta_iter.__next__()) # TODO: Python3 seq = ''.join(s.rstrip('\r\n') for s in fasta_iter.next()) if header.startswith(scheme_mlst): gene = header.split('.')[1].split('~')[0] if gene in unknown_genes: novel_alleles_keep[header] = seq reader.close() os.remove(novel_alleles) if len(novel_alleles_keep) > 0: with open(novel_alleles, 'wt') as writer: for header, seq in novel_alleles_keep.items(): writer.write('>{}\n'.format(header)) writer.write('\n'.join(utils.chunkstring(seq, 80)) + '\n')
def on_build_graph(self): graph_file = QtWidgets.QFileDialog.getOpenFileName()[0] if not graph_file: return print(graph_file) file_raw = open(graph_file).read() print(file_raw) try: data = [[x[:4]] + list(chunkstring(x[4:], 5)) for x in file_raw.split('\n') if len(x) == 24] except: print('DATA INCORRECT') QtWidgets.QMessageBox.error(self, "Ошибка", "Файл поврежден!") else: print("GRAFIC data", data) self.plot_graphics(data)
def on_file_downloaded(self): if pyboard.last_operation: file_raw = ''.join([chr(x) for x in pyboard.recieved_file]) print(file_raw) self.open_butt.setEnabled(True) try: data = [[x[:4]] + list(chunkstring(x[4:], 5)) for x in file_raw.split('\n') if len(x) == 24] except: print('DATA INCORRECT') QtWidgets.QMessageBox.error(self, "Ошибка", "Файл поврежден!") else: print("GRAFIC data", data) app.plot_graphics(data) self.close() else: QtWidgets.QMessageBox.error(self, "Ошибка", "Не удалось получить список файлов")
def translate(text): # grab MS credentials: config = configparser.ConfigParser() config.read("config.cnf") subscription_key = config.get('Translation', 'API_key') endpoint = config.get('Translation', 'endpoint_url') path = '/translate?api-version=3.0' params = '&to=en' constructed_url = endpoint + path + params headers = { 'Ocp-Apim-Subscription-Key': subscription_key, 'Content-type': 'application/json', 'X-ClientTraceId': str(uuid.uuid4()) } translated_text = '' for chunk in chunkstring(text): text_to_translate = [{'text': chunk}] # post request request = requests.post(constructed_url, headers=headers, json=text_to_translate) response = request.json() try: translated_text += json.loads( json.dumps(response, sort_keys=True, indent=4, ensure_ascii=False, separators=(',', ': ')))[0]['translations'][0]['text'] except KeyError: # doesn't seem to be critical. print('skipping these many characters: ', len(chunk)) print('keyerror, whatever that means.') continue return translated_text