Beispiel #1
0
def write_filtered_sequences_and_stats(sequence_dict, sequence_report_general, filtered_sequence_file,
                                       saveExcludedContigs):
    if saveExcludedContigs:
        path_excluded_contigs = os.path.splitext(filtered_sequence_file)[0] + '.excluded_contigs.fasta'
        excluded_contigs = open(path_excluded_contigs, 'wt')

    found_excluded_contigs = False
    with open(os.path.join(os.path.dirname(filtered_sequence_file), str(
            'assembly_mapping_report.sequences_filtered.' + os.path.splitext(os.path.basename(filtered_sequence_file))[
                0]) + '.tab'), 'wt') as report_filtered:
        with open(filtered_sequence_file, 'wt') as contigs_filtered:
            fields = ['header', 'length']
            report_filtered.write('\n'.join(
                ['#general', '>contigs', str(sequence_report_general['filtered']['contigs']), '>bp',
                 str(sequence_report_general['filtered']['bp'])]) + '\n')
            report_filtered.write('#' + '\t'.join(fields) + '\n')

            for i in range(1, len(sequence_dict) + 1):
                if not sequence_dict[i]['discard']:
                    report_filtered.write('\t'.join([str(sequence_dict[i][f]) for f in fields]) + '\n')
                    contigs_filtered.write('>' + sequence_dict[i]['header'] + '\n' + '\n'.join(
                        utils.chunkstring(sequence_dict[i]['sequence'], 80)) + '\n')
                else:
                    if saveExcludedContigs:
                        found_excluded_contigs = True
                        excluded_contigs.write('>' + sequence_dict[i]['header'] + '\n' + '\n'.join(
                            utils.chunkstring(sequence_dict[i]['sequence'], 80)) + '\n')

    if saveExcludedContigs:
        excluded_contigs.flush()
        excluded_contigs.close()
        if not found_excluded_contigs:
            os.remove(path_excluded_contigs)
Beispiel #2
0
 def plot_graphics(self, data):
     global ticks
     try:
         ticks = [':'.join(chunkstring(x[0], 2)) for x in data]
         print(data)
         print(ticks)
         int_time = list(range(len(data)))
         temp1 = [float(x[1]) for x in data]
         temp2 = [float(x[2]) for x in data]
         press1 = [float(x[3]) for x in data]
         press2 = [float(x[4]) for x in data]
         for graph in [self.temp_graph, self.press_graph]:
             graph.clear()
             graph.axis.xaxis.set_major_formatter(FuncFormatter(tick_func))
             graph.timelen = len(data)
         self.temp_graph.axis.plot(int_time, temp1, 'C1', label='T1')
         self.temp_graph.axis.plot(int_time, temp2, 'C3', label='T2')
         self.temp_graph.axis.legend()
         self.press_graph.axis.plot(int_time, press1, 'C4', label='P1')
         self.press_graph.axis.plot(int_time, press2, 'C2', label='P2')
         self.press_graph.axis.legend()
         self.temp_graph.canvas.draw()
         self.press_graph.canvas.draw()
     except:
         QtWidgets.QMessageBox.warning(self.MainWindow, "Ошибка",
                                       "Лог-файл поврежден!")
Beispiel #3
0
def write_filtered_sequences_and_stats(sequence_dict, sequence_report_general,
                                       filtered_sequence_file,
                                       saveExcludedContigs):
    if saveExcludedContigs:
        path_excluded_contigs = os.path.splitext(
            filtered_sequence_file)[0] + '.excluded_contigs.fasta'
        excluded_contigs = open(path_excluded_contigs, 'wt')

    found_excluded_contigs = False
    with open(
            os.path.join(
                os.path.dirname(filtered_sequence_file),
                str('assembly_mapping_report.sequences_filtered.' + os.path.
                    splitext(os.path.basename(filtered_sequence_file))[0]) +
                '.tab'), 'wt') as report_filtered:
        with open(filtered_sequence_file, 'wt') as contigs_filtered:
            fields = ['header', 'length']
            report_filtered.write('\n'.join([
                '#general', '>contigs',
                str(sequence_report_general['filtered']['contigs']), '>bp',
                str(sequence_report_general['filtered']['bp'])
            ]) + '\n')
            report_filtered.write('#' + '\t'.join(fields) + '\n')

            for i in range(1, len(sequence_dict) + 1):
                if not sequence_dict[i]['discard']:
                    report_filtered.write(
                        '\t'.join([str(sequence_dict[i][f])
                                   for f in fields]) + '\n')
                    contigs_filtered.write(
                        '>' + sequence_dict[i]['header'] + '\n' + '\n'.join(
                            utils.chunkstring(sequence_dict[i]['sequence'],
                                              80)) + '\n')
                else:
                    if saveExcludedContigs:
                        found_excluded_contigs = True
                        excluded_contigs.write(
                            '>' + sequence_dict[i]['header'] + '\n' +
                            '\n'.join(
                                utils.chunkstring(sequence_dict[i]['sequence'],
                                                  80)) + '\n')

    if saveExcludedContigs:
        excluded_contigs.flush()
        excluded_contigs.close()
        if not found_excluded_contigs:
            os.remove(path_excluded_contigs)
Beispiel #4
0
 def on_finish_fetching(self):
     if pyboard.last_operation:
         self.update_status('Выберите файл')
         files_raw = ''.join([chr(x) for x in pyboard.recieved_file])
         if files_raw != 'no':
             files = [
                 '.'.join(chunkstring(x, 2))
                 for x in chunkstring(files_raw, 10)
             ]
             print(files)
             for filename in files:
                 self.files_list.addItem(filename)
         self.open_butt.setEnabled(True)
     else:
         QtWidgets.QMessageBox.error(self, "Ошибка",
                                     "Не удалось получить список файлов")
     self.fetching_list = False
Beispiel #5
0
def clean_novel_alleles(novel_alleles, scheme_mlst, profile):
    """
    Clean the fasta file with the novel alleles produced by mlst

    Parameters
    ----------
    novel_alleles : str
        Path for fasta file containing the novel alleles
    scheme_mlst : str
        MLST schema found by mlst
    profile : list
        List of strings with the profile found
    Returns
    -------

    """
    unknown_genes = []
    for gene_allele in profile:
        gene = gene_allele.split('(')[0]
        try:
            allele = gene_allele.split('(')[1].rstrip(')')
            if allele.startswith('~'):
                unknown_genes.append(gene)
        except IndexError as e:
            print('WARNING: {}'.format(e))

    try:
        novel_alleles_keep = {}
        if len(unknown_genes) > 0:
            reader = open(novel_alleles, mode='rt')  # TODO: newline=None in Python3
            fasta_iter = (g for k, g in itertools_groupby(reader, lambda x: x.startswith('>')))
            for header in fasta_iter:
                # header = header.__next__()[1:].rstrip('\r\n')  # TODO: Python3
                header = header.next()[1:].rstrip('\r\n')
                # seq = ''.join(s.rstrip('\r\n') for s in fasta_iter.__next__())  # TODO: Python3
                seq = ''.join(s.rstrip('\r\n') for s in fasta_iter.next())
                if header.startswith(scheme_mlst):
                    gene = header.split('.')[1].split('~')[0]
                    if gene in unknown_genes:
                        novel_alleles_keep[header] = seq
            reader.close()

        os.remove(novel_alleles)

        if len(novel_alleles_keep) > 0:
            with open(novel_alleles, 'wt') as writer:
                for header, seq in novel_alleles_keep.items():
                    writer.write('>{}\n'.format(header))
                    writer.write('\n'.join(utils.chunkstring(seq, 80)) + '\n')
    except OSError as e:  # TODO: FileNotFoundError in Python3
        print('An unknown ST was found but no novel alleles fasta file was produced by mlst software:\n'
              '{}'.format(e))
Beispiel #6
0
 def on_download_existing_finished(self, status):
     print("BEGIN PARSING")
     print(pyboard.recieved_file)
     files_raw = ''.join([chr(x) for x in pyboard.recieved_file])
     print("EDITOR data", files_raw)
     if files_raw != 'no':
         files = chunkstring(files_raw, 3)
         self.recipes_list.clear()
         for filename in files:
             self.recipes_list.addItem(filename)
     self.open_button.setEnabled(True)
     self.save_button.setEnabled(True)
     self.warn('Список существующих рецептов получен!')
Beispiel #7
0
def clean_novel_alleles(novel_alleles, scheme_mlst, profile):
    """
    Clean the fasta file with the novel alleles produced by mlst

    Parameters
    ----------
    novel_alleles : str
        Path for fasta file containing the novel alleles
    scheme_mlst : str
        MLST schema found by mlst
    profile : list
        List of strings with the profile found
    Returns
    -------

    """
    unknown_genes = []
    for gene_allele in profile:
        gene = gene_allele.split('(')[0]
        try:
            allele = gene_allele.split('(')[1].rstrip(')')
            if allele.startswith('~'):
                unknown_genes.append(gene)
        except IndexError as e:
            print('WARNING: {}'.format(e))

    novel_alleles_keep = {}
    if len(unknown_genes) > 0:
        reader = open(novel_alleles,
                      mode='rt')  # TODO: newline=None in Python3
        fasta_iter = (
            g
            for k, g in itertools_groupby(reader, lambda x: x.startswith('>')))
        for header in fasta_iter:
            # header = header.__next__()[1:].rstrip('\r\n')  # TODO: Python3
            header = header.next()[1:].rstrip('\r\n')
            # seq = ''.join(s.rstrip('\r\n') for s in fasta_iter.__next__())  # TODO: Python3
            seq = ''.join(s.rstrip('\r\n') for s in fasta_iter.next())
            if header.startswith(scheme_mlst):
                gene = header.split('.')[1].split('~')[0]
                if gene in unknown_genes:
                    novel_alleles_keep[header] = seq
        reader.close()

    os.remove(novel_alleles)

    if len(novel_alleles_keep) > 0:
        with open(novel_alleles, 'wt') as writer:
            for header, seq in novel_alleles_keep.items():
                writer.write('>{}\n'.format(header))
                writer.write('\n'.join(utils.chunkstring(seq, 80)) + '\n')
Beispiel #8
0
 def on_build_graph(self):
     graph_file = QtWidgets.QFileDialog.getOpenFileName()[0]
     if not graph_file: return
     print(graph_file)
     file_raw = open(graph_file).read()
     print(file_raw)
     try:
         data = [[x[:4]] + list(chunkstring(x[4:], 5))
                 for x in file_raw.split('\n') if len(x) == 24]
     except:
         print('DATA INCORRECT')
         QtWidgets.QMessageBox.error(self, "Ошибка", "Файл поврежден!")
     else:
         print("GRAFIC data", data)
         self.plot_graphics(data)
Beispiel #9
0
 def on_file_downloaded(self):
     if pyboard.last_operation:
         file_raw = ''.join([chr(x) for x in pyboard.recieved_file])
         print(file_raw)
         self.open_butt.setEnabled(True)
         try:
             data = [[x[:4]] + list(chunkstring(x[4:], 5))
                     for x in file_raw.split('\n') if len(x) == 24]
         except:
             print('DATA INCORRECT')
             QtWidgets.QMessageBox.error(self, "Ошибка", "Файл поврежден!")
         else:
             print("GRAFIC data", data)
             app.plot_graphics(data)
             self.close()
     else:
         QtWidgets.QMessageBox.error(self, "Ошибка",
                                     "Не удалось получить список файлов")
def translate(text):
    # grab MS credentials:
    config = configparser.ConfigParser()
    config.read("config.cnf")
    subscription_key = config.get('Translation', 'API_key')
    endpoint = config.get('Translation', 'endpoint_url')

    path = '/translate?api-version=3.0'
    params = '&to=en'
    constructed_url = endpoint + path + params

    headers = {
        'Ocp-Apim-Subscription-Key': subscription_key,
        'Content-type': 'application/json',
        'X-ClientTraceId': str(uuid.uuid4())
    }

    translated_text = ''
    for chunk in chunkstring(text):
        text_to_translate = [{'text': chunk}]
        # post request
        request = requests.post(constructed_url,
                                headers=headers,
                                json=text_to_translate)
        response = request.json()
        try:
            translated_text += json.loads(
                json.dumps(response,
                           sort_keys=True,
                           indent=4,
                           ensure_ascii=False,
                           separators=(',',
                                       ': ')))[0]['translations'][0]['text']
        except KeyError:  # doesn't seem to be critical.
            print('skipping these many characters: ', len(chunk))
            print('keyerror, whatever that means.')
            continue
    return translated_text