Beispiel #1
0
def novoplasty_run(project, step_data, params):
    step = AssemblyStep(project, step_data, remove_data=True)

    # Create config file
    sr = SequenceReads.from_file(params.sequence_reads, relative_dir='..')
    datasets = [_dataset_data.format(
        num=num + 1,
        read_length=read.get('read_length', ''),
        insert_size=read.get('insert_length', ''),
        platform=read.get('platform', '').lower() or 'illumina',  # illumina or ion
        read_1=read.get('file' if rt == 'SE' else 'file_1'),
        read_2=read.get('file_2', ''),
        single_paired=rt) for num, (rt, read) in enumerate(sr)]

    ot = params.organelle_type[0].lower()
    g_range = _organele_range[ot]
    conf = _config_data.format(
        project_name=params.project_name or step_data['step_name'],
        organelle_type=_organele_types[ot],
        genome_range_from=params.genome_range_from or g_range[0],
        genome_range_to=params.genome_range_to or g_range[1],
        k_mer=params.k_mer,
        seed=os.path.join('..', params.seed),
        datasets='\n\n'.join(datasets),
    )
    write_str_in_file(step.step_file(_config_file), conf)

    # Run NOVOPlasty
    # ToDo: napraviti opcenitije, run ovdje ili server, trazenje exe-a, ...
    print(f"Command: cd {step.directory}; NOVOPlasty -c {_config_file} > /dev/null")
    subprocess.run(['NOVOPlasty', '-c', _config_file], cwd=step.directory, stdout=subprocess.DEVNULL)

    step.save()
    return step
Beispiel #2
0
 def get_consensus_file(self):
     cf = self.step_file('consensus.newick')
     if not os.path.isfile(cf):
         # Note: Bio.Phylo can't handle nexus file with more than one comment in a value part!
         # Remove problematic comments from nexus file
         f = self.step_file('consensus.nexus')
         text = read_file_as_str(self.step_file('result.con.tre'))
         write_str_in_file(f, re.sub(r'\[&[^\]]*]', '', text))
         import_bio_phylo().convert(f, 'nexus', cf, 'newick')
     return cf
Beispiel #3
0
def fetch_genome_assemblies(project, step_data):
    # Create table step data
    step = TableStep(project, step_data, remove_data=True)
    step.save()  # Takes a care about complete status!

    # Set instructions
    write_str_in_file(step.step_file('INSTRUCTIONS.txt'),
                      _instructions.format(step_name=step.directory))

    return step
Beispiel #4
0
def create_ogdraw(step_data,
                  image_format,
                  annotations_step,
                  common_db,
                  sequences=None):
    step = ImagesStep(annotations_step.project, step_data, remove_data=True)
    all_images = sorted(
        sequences.split(';') if sequences else annotations_step.all_sequences(
        ))

    # Fetch common db sequences
    to_fetch = step.get_common_db_records(common_db, all_images, info=True)

    # If OGDraw is done on GeSeq data, than jpg images are already in
    if image_format == 'jpg':
        # Extract jpg files job-results-<num>/GeSeqJob-<num>-<num>_<seq_ident>_OGDRAW.jpg
        for filename in annotations_step.step_files(
                matches='^job-results-[0-9]*.zip'):
            with ZipFile(annotations_step.step_file(filename), 'r') as zip_f:
                for z_i in zip_f.infolist():
                    m = _re_zip_jpg.search(z_i.filename)
                    if m:
                        seq_ident = m.group(1)
                        if seq_ident in to_fetch:
                            to_fetch.remove(seq_ident)
                            extract_from_zip(
                                zip_f, z_i.filename,
                                step.step_file(seq_ident + '.jpg'))

    # Store sequence
    if to_fetch:
        # Note: it is important that file has extension gbff (multiple sequence data)
        sequences = dict()
        for i, d in enumerate(split_list(to_fetch, 30)):
            annotations_step.concatenate_seqs_genbank(
                step.step_file(f'sequences_{i + 1}.gbff'), d)
            sequences[i + 1] = d

        # Store instructions
        write_str_in_file(
            step.step_file('INSTRUCTIONS.txt'),
            _instructions.format(step_name=step_data['step_name'],
                                 image_format=image_format))
        # Store image format used
        write_yaml(dict(image_format=image_format, sequences=sequences),
                   step.step_file('finish.yml'))

    #
    step.set_images(all_images)
    step.save(completed=not to_fetch)
    return step
Beispiel #5
0
def fetch_sequences(step_data, table_step, common_db, column_name=None):
    step = SequencesStep(table_step.project, step_data, remove_data=True)
    table_step.propagate_step_name_prefix(step)

    seq_idents = table_step.get_column_values_by_type('seq_ident',
                                                      column_name=column_name)
    to_fetch = do_fetch_sequences(step, seq_idents, common_db)

    # ToDo: remove not referenced sequences

    # Store step data
    # step._check_data()
    step.save(completed=not to_fetch)
    if to_fetch:
        write_str_in_file(
            step.step_file('INSTRUCTIONS.txt'),
            _instructions_no_data.format(sequence_db=sequence_db,
                                         seqs=', '.join(sorted(to_fetch))))
    return step
Beispiel #6
0
def create_ge_seq_data(step_data, sequences_step, common_db, num_sequences_in_file):
    step = AnnotationsStep(sequences_step.project, step_data, remove_data=True)
    sequences_step.propagate_step_name_prefix(step)
    all_sequences = list(sequences_step.all_sequences())

    # Fetch common DB sequences
    to_fetch = step.get_common_db_records(common_db, all_sequences, info=True)

    # Store sequence
    if to_fetch:
        for i, d in enumerate(split_list(to_fetch, num_sequences_in_file)):
            sequences_step.concatenate_seqs_fa(step.step_file(f'sequences_{i + 1}.fa'), d)

        # Store instructions
        write_str_in_file(step.step_file('INSTRUCTIONS.txt'), _instructions.format(step_name=step_data['step_name']))

    #
    step.set_sequences(all_sequences)
    step.save(completed=not to_fetch)
    return step
Beispiel #7
0
def create_permutations(project,
                        step_data,
                        raw_file,
                        permutations,
                        num_traits=None,
                        run=False):
    # Check input files
    map_file = raw_file.replace('.raw', '.map')
    data_dir, base_raw_file = os.path.split(raw_file)
    tmp_files = ('tmp.00m', 'tmp.00c', 'tmp.00r')
    for mf in (raw_file, map_file):
        if not os.path.isfile(mf):
            raise ZCItoolsValueError(
                f"Input MapMaker file {mf} doesn't exist!")
    for qf in tmp_files:
        f = os.path.join(data_dir, qf)
        if not os.path.isfile(f):
            raise ZCItoolsValueError(
                f"Input Windows QTL Cartographer file {qf} doesn't exist!")

    #
    step = QTLCartStep(project, step_data, remove_data=True)
    step.set_data(num_traits, permutations)

    # Copy input files
    files_to_zip = []
    for qf in tmp_files:
        files_to_zip.append(step.step_file(qf))
        copy_file(os.path.join(data_dir, qf), files_to_zip[-1])

    # Create trait directories
    # ToDo: find max traits and fix it/set default
    assert num_traits and num_traits > 0, num_traits
    trait_dirs = []
    for t_idx in range(1, num_traits + 1):
        trait_dirs.append(step.trait_dir(t_idx))
        t_dir = step.step_file(trait_dirs[-1])
        ensure_directory(t_dir)
        files_to_zip.append(os.path.join(t_dir, 'qtlcart.rc'))
        write_str_in_file(
            files_to_zip[-1],
            _qtlcart_rc.format(trait=t_idx, num_traits=num_traits))
        # # Create links to input files
        # for qf in tmp_files:
        #     link_file(os.path.join('..', qf), os.path.join(t_dir, qf))
        #

    files_to_zip.append(step.step_file('finish.yml'))
    write_yaml(dict(permutations=permutations, trait_dirs=trait_dirs),
               files_to_zip[-1])

    # Stores description.yml
    step.save(completed=run)

    # Run or set instructions
    if run:
        run_module_script(run_qtl_cart_perm, step)
    else:
        set_run_instructions(run_qtl_cart_perm, step, files_to_zip,
                             _instructions)
    #
    return step
Beispiel #8
0
    def cmd_summary(self):
        summary = self.get_summary()

        if text := summary.get('text'):
            print(text)
            write_str_in_file('workflow_summary.txt', text)
Beispiel #9
0
def create_circos_correlation(project, step_data, params):
    # Read correlation data
    cm = None
    if params.input_filename:
        cm = CorrelationMatrix.from_file(params.input_filename)

    if not cm:
        raise ZCItoolsValueError('No correlation input data!')
    num_c = cm.num_columns()
    if num_c < 2:
        raise ZCItoolsValueError('Not much of a matrix!')

    step = ImagesStep(project, step_data, remove_data=True)
    one_width = params.one_width
    gap_correlations = params.gap_correlations
    ow_2 = one_width // 2
    one_plus_gap = one_width + gap_correlations

    # Note: column lowercase names are used as column identifiers
    data_dir = step.step_file('data')
    etc_dir = step.step_file('etc')
    ensure_directory(data_dir)
    ensure_directory(etc_dir)

    colors = dict(
        (lc, 'green') for lc in cm._columns_lower)  # ToDo: some defaults
    colors['plus_'] = 'blue'
    colors['minus_'] = 'red'
    for col_def in params.group_color:
        col_fields = col_def.split(',', 1)
        if len(col_fields) == 2 and cm.check_column(col_fields[0]):
            colors[cm.check_column(col_fields[0])] = col_fields[1]
        else:
            print(f"Warning: '{col_def}' is not column color definition!")

    # data directory
    # karyotype.txt: defines groups (as chromosomes)
    # chr - <name> <label> <start> <end> <color>
    # ...
    gl = (num_c - 1) * one_width + (num_c -
                                    2) * gap_correlations  # group length
    write_str_in_file(
        os.path.join(data_dir, 'karyotype.txt'),
        '\n'.join(f"chr - {lc} {c} 0 {gl} color_{lc}"
                  for lc, c in zip(cm._columns_lower, cm._columns)))

    # tiles.txt: defines abs(correlation) == 1 interval, as tiles
    # <name> <start> <end> [options]
    with open(os.path.join(data_dir, 'tiles.txt'), 'w') as out:
        for idx1, c1 in enumerate(cm._columns_lower):
            for idx2, c2 in enumerate(cm._columns_lower):
                if idx1 == idx2:
                    continue
                pos = (idx1 - idx2 - 1) if idx1 > idx2 else (idx1 - idx2 +
                                                             (num_c - 1))
                start = pos * one_plus_gap
                out.write(
                    f"{c1} {start} {start + one_width} fill_color=color_{c2}\n"
                )

    # cells.txt: defines correlations as links
    # <cell_idx> <group_1> <start_1> <end_1> color=color_{plus|minus}_,dist={int}
    # <cell_idx> <group_2> <start_2> <end_2> color=color_{plus|minus}_,dist={int}
    # ...
    with open(os.path.join(data_dir, 'links.txt'), 'w') as out:
        cell_idx = 0
        for idx1, c1 in enumerate(cm._columns_lower):
            rest_c = cm._columns_lower[idx1 + 1:]
            for idx2, c2 in enumerate(rest_c):
                corr = cm.get(c1, c2)
                if corr is not None:
                    w = round(abs(corr) * one_width)
                    w_1 = w // 2
                    w_2 = w - w_1  # - 1?
                    centar = ow_2 + idx2 * one_plus_gap
                    color = 'plus_' if corr >= 0 else 'minus_'
                    dist = min(idx2 + 1, idx1 + (len(rest_c) - idx2))
                    atts = f"color=color_{color},dist={dist}"
                    out.write(
                        f"cell_{cell_idx} {c1} {gl - centar - w_2} {gl - centar + w_1} {atts}\n"
                    )
                    out.write(
                        f"cell_{cell_idx} {c2} {centar - w_1} {centar + w_2} {atts}\n"
                    )
                    cell_idx += 1

    # etc directory
    write_str_in_file(
        os.path.join(etc_dir, 'circos.conf'),
        _circos_conf.format(colors='\n'.join(f"color_{lc} = {c}"
                                             for lc, c in colors.items())))

    subprocess.run(['circos', '-conf', 'etc/circos.conf'], cwd=step.directory)

    # View it
    if params.show_image:
        image_viewer = get_settings().get('image_viewer')
        if image_viewer:
            subprocess.Popen([image_viewer, step.step_file('circos.png')])