Example #1
0
def finish_raxml_data(step_obj):
    output_f = step_obj.step_file('output.zip')
    if not os.path.isfile(output_f):
        raise ZCItoolsValueError('No calculation output file output.zip!')

    # Check are all file RAxML outputs, in same directories as files to process and
    # filenames matches RAxML_.*\.raxml_output
    dirs = set(
        os.path.dirname(d['filename'])
        for d in read_yaml(step_obj.step_file('finish.yml')))
    for z_file in list_zip_files(output_f):
        parts = z_file.split('/')  # ZipFile uses '/' as separator
        _dir = '' if len(parts) == 1 else os.sep.join(parts[:-1])
        if _dir not in dirs:
            raise ZCItoolsValueError(
                f'Output contains file(s) in not step directory ({_dir})!')

        if not _re_raxml_output.search(
                parts[-1]) and parts[-1] != 'run_info.txt':
            raise ZCItoolsValueError(
                f'Not RAxML output file(s)found in the output ({parts[-1]})!')

    # Unzip data
    unzip_file(output_f, step_obj.directory)

    step_obj._check_data()
    step_obj.save(create=False)
Example #2
0
 def finish(self, step_obj):
     from .common_methods import finish_alignment_data
     # Check are needed files in zip, not something strange
     files = set(d['filename'].replace('sequences.fa', 'alignment.phy')
                 for d in read_yaml(step_obj.step_file('finish.yml')))
     files.add('run_info.txt')
     finish_alignment_data(step_obj, files)
Example #3
0
def finish_mr_bayes_data(step_obj):
    output_f = step_obj.step_file('output.zip')
    if not os.path.isfile(output_f):
        raise ZCItoolsValueError('No calculation output file output.zip!')

    allowed_files = set(_RESULT_PREFIX + ext
                        for ext in ('.ckp', '.con.tre', '.parts', '.run1.p',
                                    '.run1.t', '.run2.p', '.run2.t', '.tstat',
                                    '.vstat'))

    # Check are all file MrBayes outputs
    dirs = set(
        os.path.dirname(d['filename'])
        for d in read_yaml(step_obj.step_file('finish.yml')))
    for z_file in list_zip_files(output_f):
        parts = z_file.split('/')  # ZipFile uses '/' as separator
        _dir = '' if len(parts) == 1 else os.sep.join(parts[:-1])
        if _dir not in dirs:
            raise ZCItoolsValueError(
                f'Output contains file(s) in not step directory ({_dir})!')

        if parts[-1] not in allowed_files and parts[-1] != 'run_info.txt':
            raise ZCItoolsValueError(
                f'Not MrBayes output file(s)found in the output ({parts[-1]})!'
            )

    # Unzip data
    unzip_file(output_f, step_obj.directory)

    step_obj._check_data()
    step_obj.save(create=False)
Example #4
0
 def from_file(filename, relative_dir=None):
     sr = SequenceReads(data=read_yaml(filename))
     _dir = os.path.dirname(filename)
     if relative_dir:
         _dir = os.path.join(relative_dir, _dir) if _dir else relative_dir
     if _dir:
         sr.add_relative_path(_dir)
     return sr
Example #5
0
    def read_step(self,
                  step_name,
                  check_data_type=None,
                  update_mode=False,
                  no_check=False,
                  outside_of_project=False):
        if isinstance(step_name, str):
            desc_data = read_yaml(os.path.join(step_name, 'description.yml'))
        else:
            assert isinstance(step_name, (list, tuple)), type(step_name)
            desc_data = read_yaml(os.path.join(*step_name, 'description.yml'))
        if not desc_data:
            raise ZCItoolsValueError(f"'{step_name}' is not a step!")

        data_type = desc_data['data_type']

        if check_data_type:
            if isinstance(check_data_type, str):
                if check_data_type != data_type:
                    raise ZCItoolsValueError(
                        f"Step {step_name} is not of data type '{check_data_type}'!"
                    )
            else:
                if data_type not in check_data_type:
                    raise ZCItoolsValueError(
                        f"Step {step_name} is not of data types: {', '.join(check_data_type)}!"
                    )

        cls = self.steps_map.get(data_type)
        if not cls:
            raise ZCItoolsValueError(
                f"No step class for data type {data_type}!")

        if outside_of_project and isinstance(step_name, (list, tuple)):
            return cls(self,
                       desc_data['project'],
                       update_mode=update_mode,
                       no_check=no_check,
                       step_directory=step_name)
        return cls(self,
                   desc_data['project'],
                   update_mode=update_mode,
                   no_check=no_check)
Example #6
0
def finish_ogdraw(step_obj, common_db):
    # Note: original files are left in directory

    # Check files ogdraw-result-<num>-<hash>.zip
    zip_files = step_obj.step_files(matches='^ogdraw-result-[0-9]+-.*.zip')
    if not zip_files:
        print(
            "Warning: can't find any OGDraw output file (ogdraw-result-*.zip)!"
        )
        return

    # Collect sequence idents submited
    d = read_yaml(step.step_file('finish.yml'))
    image_format = d['image_format']

    seq_ident_map = dict()  # (sequence file idx, line idx) -> seq_ident
    for seq_idx, sequences in d['sequences'].items():
        # Note: line idx starts from 1, since files in zip has that numbering
        seq_ident_map.update(((seq_idx, i + 1), seq_ident)
                             for i, seq_ident in enumerate(sequences))

    # extract ogdraw-result-<num>-<hash>/sequences_<num>ff_<num>/ogdraw_job_<hash>-outfile.<image_format>
    # Zip subdirectory naming depends on naming of OGDraw input files (sequences_<num>.gbff)
    f_end = f'-outfile.{image_format}'
    added_images = []
    for filename in zip_files:
        with ZipFile(step_obj.step_file(filename), 'r') as zip_f:
            for z_i in zip_f.infolist():
                if z_i.filename.endswith(f_end):
                    # Find sequence id of that file
                    rest = z_i.filename.split('sequences_')[1]
                    nums = re.findall(r'\d+', rest)
                    file_idx = int(nums[0])
                    line_idx = int(nums[1])
                    seq_ident = seq_ident_map[(file_idx, line_idx)]
                    #
                    added_images.append(seq_ident)
                    extract_from_zip(
                        zip_f, z_i.filename,
                        step_obj.step_file(f'{seq_ident}.{image_format}'))

    step_obj._check_data()
    step_obj.save(create=False)

    # Set into the common db
    if common_db:
        for image_ident in added_images:
            common_db.set_record(
                image_ident,
                step_obj.step_file(f'{image_ident}.{image_format}'))
Example #7
0
    def _run_command(self, command, args, cmd_args=None):
        self._args = args  # Store commands args
        command_obj = self.commands_map[command](self, args)
        command_type = command_obj.get_command_type()

        # General work
        if not command_type:
            if command_obj._PROJECT_COMMAND and not self._check_is_project_valid(
            ):
                return
            command_obj.run()

        # Create new step
        elif command_type in ('new_step', 'new_steps'):
            if not self._check_is_project_valid():
                return

            # Run command
            command_args = dict(
                (k, v) for k, v in vars(args).items()
                if k not in ('command', 'step_num', 'step_description'))
            db_id = command_obj.common_db_identifier()
            step_data = dict(
                prev_steps=command_obj.prev_steps(),
                common_db_identifier=list(db_id) if db_id else None,
                command=command,
                command_args=command_args,
                cmd=' '.join(cmd_args or sys.argv[1:]))
            ret = None
            if command_type == 'new_step':
                step_data['step_name'] = self.new_step_name(command_obj, args)
                ret = command_obj.run(step_data)
                if ret:
                    if not ret.is_completed():
                        print(
                            f'Step is not finished, check instruction ({ret.directory}/INSTRUCTIONS.txt)!'
                        )
                else:
                    print(
                        "Warning: create step command didn't return step object!"
                    )
            else:
                ret = command_obj.run(step_data)
                if ret is not None:
                    for s in ret:
                        if not s.is_completed():
                            print(
                                f'Step is not finished, check instruction ({s.directory}/INSTRUCTIONS.txt)!'
                            )
                else:
                    print(
                        "Warning: create steps command didn't return any step object!"
                    )

            if ret:
                # Store log data into project_log.yml
                step_data = dict((k, v) for k, v in step_data.items()
                                 if k in ('cmd', 'step_name'))
                # Do not store if step_data is equal as from last command?
                log = read_yaml('project_log.yml')
                if not log or log[-1] != step_data:
                    write_yaml([step_data], 'project_log.yml',
                               mode='a')  # Appends yml list

        else:
            print(f"Warning: not supported command_type {command_type}?!")
Example #8
0
 def get_description(self):
     return read_yaml(self.step_file('description.yml'))
Example #9
0
        d = self.get_description()
        if d:
            return d['data'].get(attr, default)
        return default

    # Summary data
    def save_summary_data(self, data):
        assert isinstance(data, dict), data
        write_yaml(data, self.step_file('summary.yml'))

    def make_summary_data(self):
        return

    def get_summary_data(self):
        if os.path.isfile(f := self.step_file('summary.yml')):
            return read_yaml(f)
        # 'Cached' version
        if self.is_completed() and (d := self.make_summary_data()):
            self.save_summary_data(d)
            return d

    #
    def get_finish_data(self):
        if os.path.isfile(f := self.step_file('finish.yml')):
            return read_yaml(f)

    # Substep methods
    def get_substep_step_data(self, step_name):
        return dict(
            step_name=step_name
        )  # , prev_steps=None, command=None, command_args=None, cmd=None)