コード例 #1
0
def mob_suite(redmine_instance, issue, work_dir, description):
    """
    """
    # Unpickle Redmine objects
    redmine_instance = pickle.load(open(redmine_instance, 'rb'))
    issue = pickle.load(open(issue, 'rb'))
    description = pickle.load(open(description, 'rb'))

    try:
        # Description should just be a list of SEQIDs. Get the fasta files associated with them extracted
        # to the bio_request dir
        retrieve_nas_files(
            seqids=description,
            outdir=os.path.join(work_dir, 'fastas'),
            filetype='fasta',
            copyflag=True
        )  # Since we're docker-ing need to copy. Files get cleaned up at end of process
        # Now we need to run mob_recon (and typing!) on each of the fasta files requested. Put all results into one
        # folder (this will need to be uploaded to FTP - will overwhelm max (10MB) file size limit on Redmine

        fasta_files = glob.glob(os.path.join(work_dir, 'fastas', '*.fasta'))
        # Verify that specified fasta files are actually there, warn user if they aren't.
        missing_fastas = verify_fasta_files_present(seqid_list=description,
                                                    fasta_dir=os.path.join(
                                                        work_dir, 'fastas'))
        if len(missing_fastas) > 0:
            redmine_instance.issue.update(
                resource_id=issue.id,
                notes='WARNING: Could not find the following requested SEQIDs on'
                ' the OLC NAS: {}'.format(missing_fastas))

        # Make output dir
        output_dir = os.path.join(work_dir, 'mob_suite_results')
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)

        for fasta in fasta_files:
            seqid = os.path.split(fasta)[-1].split('.')[0]
            # Run mobsuite via docker, since I can't seem to make it work with slurm any other way.
            cmd = 'docker run --rm -i -u $(id -u) -v /mnt/nas2:/mnt/nas2 mob_suite:latest /bin/bash -c "source activate ' \
                  '/mnt/nas2/virtual_environments/mob_suite && mob_recon -i {input_fasta} -o {output_dir} ' \
                  '--run_typer"'.format(input_fasta=fasta,
                                        output_dir=os.path.join(output_dir, seqid))
            os.system(cmd)

        # With mobsuite done, zip up the results folder and upload to the FTP.
        shutil.make_archive(root_dir=output_dir,
                            format='zip',
                            base_name=os.path.join(work_dir, str(issue.id)))

        upload_successful = upload_to_ftp(
            local_file=os.path.join(work_dir,
                                    str(issue.id) + '.zip'))

        # And finally, do some file cleanup.
        try:
            shutil.rmtree(output_dir)
            shutil.rmtree(os.path.join(work_dir, 'fastas'))
            os.remove(os.path.join(work_dir, str(issue.id) + '.zip'))
        except:
            pass

        if upload_successful:
            redmine_instance.issue.update(
                resource_id=issue.id,
                status_id=4,
                notes='Mob-suite process complete!\n\n'
                'Results are available at the following FTP address:\n'
                'ftp://ftp.agr.gc.ca/outgoing/cfia-ak/{}'.format(
                    str(issue.id) + '.zip'))
        else:
            redmine_instance.issue.update(
                resource_id=issue.id,
                status_id=4,
                notes=
                'Upload of result files was unsuccessful due to FTP connectivity issues. '
                'Please try again later.')

    except Exception as e:
        redmine_instance.issue.update(
            resource_id=issue.id,
            notes=
            'Something went wrong! Send this error traceback to your friendly '
            'neighborhood bioinformatician: {}'.format(e))
コード例 #2
0
def cowsnphr_redmine(redmine_instance, issue, work_dir, description):
    try:
        # Unpickle Redmine objects
        redmine_instance = pickle.load(open(redmine_instance, 'rb'))
        issue = pickle.load(open(issue, 'rb'))
        description = pickle.load(open(description, 'rb'))
        #
        query_list = list()
        reference = list()
        compare = False
        # Go through description to figure out what our query is and what the reference is.
        for item in description:
            item = item.upper()
            if item == '':
                continue
            if 'COMPARE' in item:
                compare = True
                continue
            if compare:
                query_list.append(item)
            else:
                if 'REFERENCE' not in item:
                    reference.append(item)
        # Create output folder
        reference_folder = os.path.join(work_dir, 'ref')
        os.makedirs(reference_folder)
        # Retrieve our reference file. Error user if they selected anything but one reference and don't continue.
        if len(reference) != 1:
            redmine_instance.issue.update(
                resource_id=issue.id,
                notes='ERROR: You must specify one reference strain, and you '
                'specified {} reference strains. Please create a new'
                ' issue and try again.'.format(len(reference)),
                status_id=4)
            return

        if reference[0].upper() != 'ATTACHED':
            # Extract our reference file to our working directory.
            retrieve_nas_files(seqids=reference,
                               outdir=reference_folder,
                               filetype='fasta',
                               copyflag=True)
            # Check that the file was successfully extracted. If it wasn't boot the user.
            if len(glob.glob(os.path.join(reference_folder, '*fasta'))) == 0:
                redmine_instance.issue.update(
                    resource_id=issue.id,
                    notes='ERROR: Could not find the specified reference file.'
                    ' Please verify it is a correct SEQID, create a new '
                    'issue, and try again.',
                    status_id=4)
                return

        # If user specified attachment as the reference file, download it to our working directory.
        else:
            # Get the attachment ID, and download if it isn't equal to zero (meaning no attachment, so boot user with
            # appropriate error message)
            attachment = redmine_instance.issue.get(issue.id,
                                                    include='attachments')
            attachment_id = 0
            ref_name = 'reference.fasta'
            for item in attachment.attachments:
                attachment_id = item.id
                ref_name = item.filename
            # Download if we found an attachment, and use as our reference. Otherwise, exit and tell user to try again
            if attachment_id != 0:
                attachment = redmine_instance.attachment.get(attachment_id)
                attachment.download(savepath=reference_folder,
                                    filename=ref_name)
            else:
                redmine_instance.issue.update(
                    resource_id=issue.id,
                    notes=
                    'ERROR: You specified that the reference would be in attached file,'
                    ' but no attached file was found. Please create a new issue and '
                    'try again.',
                    status_id=4)
                return

        # PROKKA
        # These unfortunate hard coded paths appear to be necessary
        activate = 'source /home/ubuntu/miniconda3/bin/activate /mnt/nas2/virtual_environments/prokka'
        prokka = '/mnt/nas2/virtual_environments/prokka/bin/prokka'

        # Prepare command
        ref_file = glob.glob(os.path.join(reference_folder, '*fasta'))[0]
        prefix = os.path.splitext(os.path.basename(ref_file))[0]
        cmd = '{prokka} --force --outdir {output_folder} --prefix {prefix} {ref_file}'\
            .format(prokka=prokka,
                    output_folder=reference_folder,
                    prefix=prefix,
                    ref_file=ref_file)
        # Update the issue with the GeneSeekr command
        redmine_instance.issue.update(
            resource_id=issue.id,
            notes='Prokka command:\n {cmd}'.format(cmd=cmd))
        # Create another shell script to execute within the PlasmidExtractor conda environment
        template = "#!/bin/bash\n{} && {}".format(activate, cmd)
        prokka_script = os.path.join(work_dir, 'run_prokka.sh')
        with open(prokka_script, 'w+') as file:
            file.write(template)
        make_executable(prokka_script)

        # Run shell script
        os.system(prokka_script)

        #
        seq_folder = os.path.join(work_dir, 'fastqs')
        # Now extract our query files.
        retrieve_nas_files(seqids=query_list,
                           outdir=seq_folder,
                           filetype='fastq',
                           copyflag=False)

        # With our query files extracted, verify that all the SEQIDs the user specified were able to be found.
        missing_fastqs = verify_fastqs_present(query_list, seq_folder)
        if len(missing_fastqs) > 0:
            redmine_instance.issue.update(
                resource_id=issue.id,
                notes=
                'Warning! Could not find the following requested query SEQIDs: '
                '{}. \nYou may want to verify the SEQIDs, create a new issue, and try'
                ' again.'.format(str(missing_fastqs)))

        # Now check that the FASTQ files aren't too far away from the specified reference file.
        bad_fastqs = check_distances(ref_fasta=glob.glob(
            os.path.join(reference_folder, '*fasta'))[0],
                                     fastq_folder=seq_folder,
                                     work_dir=work_dir)
        if len(bad_fastqs) > 0:
            redmine_instance.issue.update(
                resource_id=issue.id,
                notes='Warning! The following SEQIDs were found to be fairly'
                ' divergent from the reference file specified:{} \nYou may'
                ' want to start a new COWSNPhR issue without them and try '
                'again.'.format(str(bad_fastqs)))

        # COWSNPhR
        # These unfortunate hard coded paths appear to be necessary
        activate = 'source /home/ubuntu/miniconda3/bin/activate /mnt/nas2/virtual_environments/vsnp_dev'
        binary = '/mnt/nas2/virtual_environments/vSNP/cowsnphr/cowsnphr.py'

        # Prepare command
        cmd = '{bin} -s {seq_folder} -r {ref_folder} -w /mnt/nas2' \
            .format(bin=binary,
                    seq_folder=seq_folder,
                    ref_folder=reference_folder)
        # Update the issue with the GeneSeekr command
        redmine_instance.issue.update(
            resource_id=issue.id,
            notes='COWSNPhR command:\n {cmd}'.format(cmd=cmd))
        # Create another shell script to execute within the PlasmidExtractor conda environment
        template = "#!/bin/bash\n{} && {}".format(activate, cmd)
        cowsnphr_script = os.path.join(work_dir, 'run_cowsnphr.sh')
        with open(cowsnphr_script, 'w+') as file:
            file.write(template)
        make_executable(cowsnphr_script)

        # Run shell script
        os.system(cowsnphr_script)

        # Zip output
        output_filename = 'cowsnphr_output_{}'.format(issue.id)
        zip_filepath = zip_folder(output_dir=work_dir,
                                  output_filename=output_filename)
        zip_filepath += '.zip'
        #
        upload_successful = upload_to_ftp(local_file=zip_filepath)
        # Prepare upload
        if upload_successful:
            redmine_instance.issue.update(
                resource_id=issue.id,
                status_id=4,
                notes='COWSNPhr process complete!\n\n'
                'Results are available at the following FTP address:\n'
                'ftp://ftp.agr.gc.ca/outgoing/cfia-ak/{}'.format(
                    os.path.split(zip_filepath)[1]))
        else:
            redmine_instance.issue.update(
                resource_id=issue.id,
                status_id=4,
                notes=
                'Upload of result files was unsuccessful due to FTP connectivity '
                'issues. Please try again later.')
        # Clean up files
        shutil.rmtree(reference_folder)
        shutil.rmtree(seq_folder)
    except Exception as e:
        redmine_instance.issue.update(
            resource_id=issue.id,
            notes=
            'Something went wrong! Send this error traceback to your friendly '
            'neighborhood bioinformatician: {}'.format(e))
コード例 #3
0
def psortb_redmine(redmine_instance, issue, work_dir, description):
    try:
        # Unpickle Redmine objects
        redmine_instance = pickle.load(open(redmine_instance, 'rb'))
        issue = pickle.load(open(issue, 'rb'))
        description = pickle.load(open(description, 'rb'))
        # Parse description to get list of SeqIDs
        seqids = list()
        for i in range(0, len(description)):
            item = description[i].rstrip()
            item = item.upper()
            seqids.append(item)

        assemblies_folder = os.path.join(work_dir, 'assemblies')
        retrieve_nas_files(seqids=seqids,
                           outdir=assemblies_folder,
                           filetype='fasta',
                           copyflag=False)
        missing_fastas = check_fastas_present(seqids, assemblies_folder)
        if len(missing_fastas) > 0:
            redmine_instance.issue.update(resource_id=issue.id,
                                          notes='WARNING: Could not find the following requested FASTA SEQIDs on'
                                                ' the OLC NAS: {}'.format(missing_fastas))
        for fasta in missing_fastas:
            seqids.remove(fasta)
        # Steps to follow here:
        # 1) Generate protein file for sequence(s) of interest - run prokka so proteins get named nicely.

        prokka_folder = os.path.join(work_dir, 'prokka')
        os.makedirs(prokka_folder)
        # These unfortunate hard coded paths appear to be necessary
        activate = 'source /home/ubuntu/miniconda3/bin/activate /mnt/nas2/virtual_environments/prokka'
        prokka = '/mnt/nas2/virtual_environments/prokka/bin/prokka'

        for assembly in glob.glob(os.path.join(assemblies_folder, '*.fasta')):
            seqid = os.path.split(assembly)[1].split('.')[0]
            # Prepare command
            cmd = '{prokka} --outdir {output_folder} --prefix {seqid} {assembly}'.format(prokka=prokka,
                                                                                         output_folder=os.path.join(prokka_folder, seqid),
                                                                                         seqid=seqid,
                                                                                         assembly=assembly)

            # Create another shell script to execute within the PlasmidExtractor conda environment
            template = "#!/bin/bash\n{} && {}".format(activate, cmd)
            prokka_script = os.path.join(work_dir, 'run_prokka.sh')
            with open(prokka_script, 'w+') as file:
                file.write(template)
            make_executable(prokka_script)

            # Run shell script
            os.system(prokka_script)
        # 2) Figure out for each sequence if gram positive or negative
        # Psort says using Omp85 works pretty well for determining. Use Omp85 proteins from Neisseria, Thermosipho,
        # Synechoccus, and Thermus. If any hits with e value < 10^-3 gram positive, otherwise gram negative. There
        # are exceptions to this, but I don't think any of them are organisms that we work with/care about
        gram_pos_neg_dict = dict()
        protein_files = glob.glob(os.path.join(prokka_folder, '*', '*.faa'))
        for protein_file in protein_files:
            seqid = os.path.split(protein_file)[1].replace('.faa', '')
            # Make a blast DB from proteins.
            cmd = 'makeblastdb -in {} -dbtype prot'.format(protein_file)
            os.system(cmd)
            # Now BLAST our OMP85 proteins against the genome proteins.
            blast_result_file = protein_file.replace('.faa', '_blast.tsv')
            omp85_proteins = '/mnt/nas2/redmine/applications/OLCRedmineAutomator/data_and_stuff/omp85_proteins.fasta'
            cmd = 'blastp -db {} -query {} -out {} -outfmt "6 qseqid sseqid evalue"'.format(protein_file,
                                                                                            omp85_proteins,
                                                                                            blast_result_file)
            os.system(cmd)
            # Now parse through blast report to find if gram positive or negative
            has_omp_85 = False
            with open(blast_result_file) as f:
                for line in f:
                    evalue = float(line.rstrip().split()[-1])
                    if evalue < 0.0001:
                        has_omp_85 = True

            if has_omp_85 is True:
                gram_pos_neg_dict[seqid] = 'Negative'
            else:
                gram_pos_neg_dict[seqid] = 'Positive'

        """
        IMPORTANT NOTES ON GETTING PSORTB TO RUN:
        You'll need to 
        1) have pulled a docker image of PSORTB to each of the 
        nodes (docker pull brinkmanlab/psortb_commandline:1.0.2 should do the trick)
        2) Put a psortb executable into the data_and_stuff folder: 
        wget -O data_and_stuff/psortb https://raw.githubusercontent.com/brinkmanlab/psortb_commandline_docker/master/psortb
        3) chmod the psortb executable to actually make it executable.
        4) remove the 'sudo' from lines 35 and 61 of the psortb executable, otherwise nodes get unhappy, and make the -it
        in the commands into just a -i
        """

        # 3) Run PsortB!
        for seqid in seqids:
            protein_file = os.path.join(prokka_folder, seqid, seqid + '.faa')
            output_dir = os.path.join(prokka_folder, seqid)
            psortb_executable = '/mnt/nas2/redmine/applications/OLCRedmineAutomator/data_and_stuff/psortb'
            cmd = '{} -i {} -r {} '.format(psortb_executable, protein_file, output_dir)
            if gram_pos_neg_dict[seqid] == 'Negative':
                cmd += '--negative'
            else:
                cmd += '--positive'
            os.system(cmd)

        # Now need to: upload results, do file cleanup.
        report_dir = os.path.join(work_dir, 'psortb_reports_{}'.format(issue.id))
        os.makedirs(report_dir)
        raw_reports = glob.glob(os.path.join(prokka_folder, '*', '*psortb*.txt'))
        for raw_report in raw_reports:
            new_name = raw_report.split('/')[-2] + '_' + os.path.split(raw_report)[1]
            cmd = 'cp {} {}'.format(raw_report, os.path.join(report_dir, new_name))
            os.system(cmd)
        cmd = 'cp {} {}'.format(os.path.join(prokka_folder, '*', '*.faa'), report_dir)
        os.system(cmd)

        shutil.make_archive(report_dir, 'zip', report_dir)
        upload_successful = upload_to_ftp(local_file=report_dir + '.zip')
        redmine_instance.issue.update(resource_id=issue.id, status_id=4,
                                      notes='PsortB complete! Results available at: '
                                            'ftp://ftp.agr.gc.ca/outgoing/cfia-ak/{}'.format(os.path.split(report_dir)[1] + '.zip'))
        shutil.rmtree(assemblies_folder)
        shutil.rmtree(prokka_folder)
    except Exception as e:
        redmine_instance.issue.update(resource_id=issue.id,
                                      notes='Something went wrong! Send this error traceback to your friendly '
                                            'neighborhood bioinformatician: {}'.format(e))
コード例 #4
0
def wgsassembly_redmine(redmine_instance, issue, work_dir, description):
    # Unpickle Redmine objects
    redmine_instance = pickle.load(open(redmine_instance, 'rb'))
    issue = pickle.load(open(issue, 'rb'))
    description = pickle.load(open(description, 'rb'))

    try:
        # Add Cathy as a watcher so that we can make sure things get done. Also add me (Andrew) in case people
        # forget to assign the issue to me.
        issue.watcher.add(225)  # This is Cathy
        issue.watcher.add(296)  # This is me.
        # instead of folder on NAS.
        # Verify that sequence folder in description is named correctly.
        sequence_folder = description[0]

        # If the sequence folder looks like an absolute path on our NAS (implement a tougher check here),
        # we assume the user knows what they're doing and don't bother validating anything
        if len(sequence_folder.split('/')) > 1:
            redmine_instance.issue.update(resource_id=issue.id, status_id=2,
                                          notes='Attempting to use files already on NAS. Only use this option if you'
                                                ' really know what you\'re doing!')
            # os.path.split does not work as I thought it did. Apparently if a trailing slash is present, last element
            # it returns is '', not the final folder in the path. This fixes that.
            if sequence_folder.endswith('/'):
                sequence_folder = sequence_folder[:-1]
            local_folder = sequence_folder
            # samplesheet_seqids = get_seqids_from_samplesheet(os.path.join(sequence_folder, 'SampleSheet.csv'))
            # lab_id = samplesheet_seqids[0].split('-')[1]
            sequence_folder = os.path.split(local_folder)[1]

        # Otherwise, do all verification checks on the FTP and download files.
        else:
            validation = verify_all_the_things(sequence_folder=sequence_folder,
                                               issue=issue,
                                               work_dir=work_dir,
                                               redmine_instance=redmine_instance)

            # All checks that needed to be done should now be done. If any of them returned something bad,
            # we stop and boot the user. Otherwise, go ahead with downloading files.
            if validation is False:
                return

            download_info_sheets(sequence_folder, work_dir)
            redmine_instance.issue.update(resource_id=issue.id, status_id=2,
                                          notes='All validation checks passed - beginning download '
                                                'and assembly of sequence files.')

            # Create the local folder that we'll need.
            local_folder = os.path.join('/mnt/nas2/raw_sequence_data/miseq', sequence_folder)

            if not os.path.isdir(local_folder):
                os.makedirs(local_folder)

            # Download the folder, recursively!
            download_successful = download_dir(sequence_folder, local_folder)

            if download_successful is False:
                redmine_instance.issue.update(resource_id=issue.id,
                                              assigned_to_id=296,
                                              subject='WGS Assembly: {}'.format(description[0]),
                                              notes='Download of files from FTP was not successful.')
                return

        # Once the folder has been downloaded, copy it to the hdfs and start assembling using docker image.
        cmd = 'cp -r {local_folder} /hdfs'.format(local_folder=local_folder)
        os.system(cmd)
        # Run the new pipeline docker image, after making sure it doesn't exist.
        cmd = 'docker rm -f cowbat'
        os.system(cmd)
        cmd = 'docker run -i -u $(id -u) -v /mnt/nas2:/mnt/nas2 -v /hdfs:/hdfs --name cowbat --rm {cowbat_image} /bin/bash -c ' \
              '"source activate cowbat && assembly_pipeline.py -s {hdfs_folder} ' \
              '-r {cowbat_databases}"'.format(hdfs_folder=os.path.join('/hdfs', sequence_folder),
                                              cowbat_image=COWBAT_IMAGE,
                                              cowbat_databases=COWBAT_DATABASES)
        os.system(cmd)
        # Now need to move to an appropriate processed_sequence_data folder.
        local_wgs_spades_folder = os.path.join('/mnt/nas2/processed_sequence_data/miseq_assemblies', sequence_folder)
        cmd = 'mv {hdfs_folder} {wgsspades_folder}'.format(hdfs_folder=os.path.join('/hdfs', sequence_folder),
                                                           wgsspades_folder=local_wgs_spades_folder)
        print(cmd)
        os.system(cmd)

        # Remove the raw sequence files from processed_sequence_data, since we already have them in raw.
        cmd = 'rm {fastq_files}'.format(fastq_files=os.path.join(local_wgs_spades_folder, '*.fastq.gz'))
        print(cmd)
        os.system(cmd)

        # Upload the results of the sequencing run to Redmine.
        cmd = 'cp {samplesheet} {reports_folder}'.format(samplesheet=os.path.join(local_wgs_spades_folder, 'SampleSheet.csv'),
                                                         reports_folder=os.path.join(local_wgs_spades_folder, 'reports'))
        os.system(cmd)
        shutil.make_archive(os.path.join(work_dir, sequence_folder), 'zip', os.path.join(local_wgs_spades_folder, 'reports'))
        output_list = list()
        output_dict = dict()
        output_dict['path'] = os.path.join(work_dir, sequence_folder + '.zip')
        output_dict['filename'] = sequence_folder + '.zip'
        output_list.append(output_dict)

        # Apparently we're also supposed to be uploading assemblies - these will be too big to be Redmine attachments,
        # so we'll need to upload to the ftp.
        folder_to_upload = os.path.join(work_dir, 'reports_and_assemblies')
        os.makedirs(folder_to_upload)
        cmd = 'cp -r {best_assemblies} {upload_folder}'.format(best_assemblies=os.path.join(local_wgs_spades_folder, 'BestAssemblies'),
                                                               upload_folder=folder_to_upload)
        os.system(cmd)
        cmd = 'cp -r {reports} {upload_folder}'.format(reports=os.path.join(local_wgs_spades_folder, 'reports'),
                                                       upload_folder=folder_to_upload)
        os.system(cmd)
        shutil.make_archive(os.path.join(work_dir, str(issue.id)), 'zip', folder_to_upload)

        # At this point, zip folder has been created (hopefully) called issue_id.zip in biorequest dir. Upload that
        # to the FTP.
        upload_successful = upload_to_ftp(local_file=os.path.join(work_dir, str(issue.id) + '.zip'))

        # Make redmine tell Paul that a run has finished and that we should add things to our DB so things don't get missed
        # to be made. assinged_to_id to use is 226. Priority is 3 (High).
        if upload_successful:
            redmine_instance.issue.update(resource_id=issue.id,
                                          assigned_to_id=296, priority_id=3,
                                          subject='WGS Assembly: {}'.format(description[0]), # Add run name to subject
                                          notes='This run has finished assembly! Please add it to the OLC Database.\n'
                                                'Reports and assemblies uploaded to FTP at: ftp://ftp.agr.gc.ca/outgoing/'
                                                'cfia-ak/{}.zip'.format(issue.id))
        else:
            redmine_instance.issue.update(resource_id=issue.id,
                                          assigned_to_id=296,
                                          subject='WGS Assembly: {}'.format(description[0]),
                                          notes='Upload of result files was not successful. Upload them manually!')
        try:
            delete_ftp_dir(description[0])
        except:  # Hakuna matata if things don't get deleted. This is just a nice-to-have
            pass

    except Exception as e:
        redmine_instance.issue.update(resource_id=issue.id,
                                      notes='Something went wrong! Send this error traceback to your friendly '
                                            'neighborhood bioinformatician: {}'.format(e))
        print(traceback.print_exc())
コード例 #5
0
def reportretrieve_redmine(redmine_instance, issue, work_dir, description):
    print('External retrieving!')
    # Unpickle Redmine objects
    redmine_instance = pickle.load(open(redmine_instance, 'rb'))
    issue = pickle.load(open(issue, 'rb'))
    description = pickle.load(open(description, 'rb'))

    try:
        os.makedirs(os.path.join(work_dir, str(issue.id)))
        # Parse description to figure out what SEQIDs we need to run on.
        seqid_list = list()
        for item in description:
            item = item.upper()
            if item != '':
                seqid_list.append(item)

        report_path_list = list()
        # Go through CombinedMetadata sheets to find which folders we need to copy to FTP.
        metadata_sheets = glob.glob(
            '/mnt/nas2/processed_sequence_data/miseq_assemblies/*/reports/combinedMetadata.csv'
        )

        for metadata_sheet in metadata_sheets:
            with open(metadata_sheet) as csvfile:
                lines = csvfile.readlines()
                for i in range(1, len(lines)):
                    x = lines[i].split(',')
                    if x[0] in seqid_list:  # First entry in the row should be the SEQID.
                        report_path = os.path.abspath(metadata_sheet)
                        if report_path not in report_path_list:
                            report_path_list.append(report_path)
                        seqid_list.remove(x[0])

        # Warn the user if reports couldn't be found for some SEQIDs.
        if len(seqid_list) > 0:
            redmine_instance.issue.update(
                resource_id=issue.id,
                notes=
                'WARNING: Could not find reports for the following SEQIDs: '
                '{}'.format(seqid_list))

        # Go through the report path list and copy reports folders, while renaming them.
        for report in report_path_list:
            complete_path = os.path.abspath(report)
            report_folder = os.path.split(complete_path)[:-1]
            new_folder_name = report_folder[0].split(
                '/')[-2] + '_' + report_folder[0].split('/')[-1]
            if os.path.isdir(
                    os.path.join(
                        work_dir, str(issue.id),
                        new_folder_name)):  # Very slim possiblity two folders
                # could have the same name. This takes care of that. No way there should ever be more than two.
                new_folder_name = new_folder_name + '_2'
            cmd = 'cp -r {report_folder} {new_folder}'.format(
                report_folder=report_folder[0],
                new_folder=os.path.join(work_dir, str(issue.id),
                                        new_folder_name))
            os.system(cmd)

        # Now make a zip folder that we'll upload to the FTP.
        shutil.make_archive(root_dir=os.path.join(work_dir, str(issue.id)),
                            format='zip',
                            base_name=os.path.join(work_dir, str(issue.id)))

        upload_successful = upload_to_ftp(
            local_file=os.path.join(work_dir,
                                    str(issue.id) + '.zip'))

        # And finally, do some file cleanup.
        try:
            shutil.rmtree(os.path.join(work_dir, str(issue.id)))
            os.remove(os.path.join(work_dir, str(issue.id) + '.zip'))
        except:
            pass

        if upload_successful:
            redmine_instance.issue.update(
                resource_id=issue.id,
                status_id=4,
                notes='Report Retrieve process complete!\n\n'
                'Results are available at the following FTP address:\n'
                'ftp://ftp.agr.gc.ca/outgoing/cfia-ak/{}'.format(
                    str(issue.id) + '.zip'))
        else:
            redmine_instance.issue.update(
                resource_id=issue.id,
                status_id=4,
                notes=
                'Upload of result files was unsuccessful due to FTP connectivity issues. '
                'Please try again later.')

    except Exception as e:
        redmine_instance.issue.update(
            resource_id=issue.id,
            notes=
            'Something went wrong! Send this error traceback to your friendly '
            'neighborhood bioinformatician: {}'.format(e))
コード例 #6
0
def prokka_redmine(redmine_instance, issue, work_dir, description):
    try:
        # Unpickle Redmine objects
        redmine_instance = pickle.load(open(redmine_instance, 'rb'))
        issue = pickle.load(open(issue, 'rb'))
        description = pickle.load(open(description, 'rb'))

        # Parse description to get list of SeqIDs
        seqids = []
        for i in range(0, len(description)):
            item = description[i]
            item = item.upper()

            # Minimal check to make sure IDs provided somewhat resemble a valid sample ID
            seqids.append(item)

        # Create folder to drop FASTQ files
        assemblies_folder = os.path.join(work_dir, 'assemblies')
        os.mkdir(assemblies_folder)

        # Create output folder
        output_folder = os.path.join(work_dir, 'output')
        os.makedirs(output_folder)

        # Extract FASTQ files.
        retrieve_nas_files(seqids=seqids,
                           outdir=assemblies_folder,
                           filetype='fasta',
                           copyflag=False)
        missing_fastas = verify_fasta_files_present(seqids, assemblies_folder)
        if missing_fastas:
            redmine_instance.issue.update(
                resource_id=issue.id,
                notes=
                'WARNING: Could not find the following requested SEQIDs on '
                'the OLC NAS: {}'.format(missing_fastas))

        # These unfortunate hard coded paths appear to be necessary
        activate = 'source /home/ubuntu/miniconda3/bin/activate /mnt/nas2/virtual_environments/prokka'
        prokka = '/mnt/nas2/virtual_environments/prokka/bin/prokka'

        for assembly in glob.glob(os.path.join(assemblies_folder, '*.fasta')):
            seqid = os.path.split(assembly)[1].split('.')[0]
            # Prepare command
            cmd = '{prokka} --outdir {output_folder} --prefix {seqid} {assembly}'.format(
                prokka=prokka,
                output_folder=os.path.join(output_folder, seqid),
                seqid=seqid,
                assembly=assembly)

            # Create another shell script to execute within the PlasmidExtractor conda environment
            template = "#!/bin/bash\n{} && {}".format(activate, cmd)
            prokka_script = os.path.join(work_dir, 'run_prokka.sh')
            with open(prokka_script, 'w+') as file:
                file.write(template)
            make_executable(prokka_script)

            # Run shell script
            os.system(prokka_script)

        # Zip output
        output_filename = 'prokka_output_{}'.format(issue.id)
        zip_filepath = zip_folder(results_path=output_folder,
                                  output_dir=work_dir,
                                  output_filename=output_filename)
        zip_filepath += '.zip'

        upload_successful = upload_to_ftp(local_file=zip_filepath)
        # Prepare upload
        if upload_successful:
            redmine_instance.issue.update(
                resource_id=issue.id,
                status_id=4,
                notes='Prokka process complete!\n\n'
                'Results are available at the following FTP address:\n'
                'ftp://ftp.agr.gc.ca/outgoing/cfia-ak/{}'.format(
                    os.path.split(zip_filepath)[1]))
        else:
            redmine_instance.issue.update(
                resource_id=issue.id,
                status_id=4,
                notes=
                'Upload of result files was unsuccessful due to FTP connectivity issues. '
                'Please try again later.')
        # Clean up files
        shutil.rmtree(output_folder)
        os.remove(zip_filepath)
    except Exception as e:
        redmine_instance.issue.update(
            resource_id=issue.id,
            notes=
            'Something went wrong! Send this error traceback to your friendly '
            'neighborhood bioinformatician: {}'.format(e))