Ejemplo n.º 1
0
def system_call(cmd):
    """Call cmd and return (stdout, stderr, return_value).

    cmd: can be either a string containing the command to be run, or a
     sequence of strings that are the tokens of the command.

    This function is ported from QIIME (http://www.qiime.org), previously
    named qiime_system_call. QIIME is a GPL project, but we obtained permission
    from the authors of this function to port it to pyqi (and keep it under
    pyqi's BSD license).
    """
    proc = Popen(cmd,
                 universal_newlines=True,
                 shell=True,
                 stdout=PIPE,
                 stderr=PIPE)
    # communicate pulls all stdout/stderr from the PIPEs to
    # avoid blocking -- don't remove this line!
    stdout, stderr = proc.communicate()
    return_value = proc.returncode

    if return_value != 0:
        raise ComputeError("Failed to execute: %s\nstdout: %s\nstderr: %s" %
                           (cmd, stdout, stderr))

    return stdout, stderr, return_value
Ejemplo n.º 2
0
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    fastq_dir_fp : str, optional
        The fastq filepath
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    preprocessed_data_id_str = str(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    investigation_type = None
    new_investigation_type = None

    status = preprocessed_data.submitted_to_insdc_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

    if send:
        # If we intend actually to send the files, then change the status in
        # the database
        preprocessed_data.update_insdc_status('submitting')

    # we need to figure out whether the investigation type is a known one
    # or if we have to submit a "new_investigation_type" to EBI
    current_type = prep_template.investigation_type
    ena_ontology = Ontology(convert_to_id('ENA', 'ontology'))
    if current_type in ena_ontology.terms:
        investigation_type = current_type
    elif current_type in ena_ontology.user_defined_terms:
        investigation_type = 'Other'
        new_investigation_type = current_type
    else:
        # This should never happen
        raise ValueError("Unrecognized investigation type: '%s'. This term "
                         "is neither one of the official terms nor one of the "
                         "user-defined terms in the ENA ontology")

    if fastq_dir_fp is not None:
        # If the user specifies a FASTQ directory, use it

        # Set demux_samples to None so that MetadataTemplate.to_file will put
        # all samples in the template files
        demux_samples = None
    else:
        # If the user does not specify a FASTQ directory, create one and
        # re-serialize the per-sample FASTQs from the demux file
        fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir)
        demux = [
            path for _, path, ftype in preprocessed_data.get_filepaths()
            if ftype == 'preprocessed_demux'
        ][0]

        # Keep track of which files were actually in the demux file so that we
        # can write those rows to the prep and samples templates
        demux_samples = set()

        with open_file(demux) as demux_fh:
            for samp, iterator in to_per_sample_ascii(demux_fh,
                                                      list(sample_template)):
                demux_samples.add(samp)
                sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp)
                with gzopen(sample_fp, 'w') as fh:
                    for record in iterator:
                        fh.write(record)

    output_dir = fastq_dir_fp + '_submission'

    samp_fp = join(fastq_dir_fp, 'sample_metadata.txt')
    prep_fp = join(fastq_dir_fp, 'prep_metadata.txt')

    sample_template.to_file(samp_fp, demux_samples)
    prep_template.to_file(prep_fp, demux_samples)

    # Get specific output directory and set filepaths
    get_output_fp = partial(join, output_dir)
    study_fp = get_output_fp('study.xml')
    sample_fp = get_output_fp('sample.xml')
    experiment_fp = get_output_fp('experiment.xml')
    run_fp = get_output_fp('run.xml')
    submission_fp = get_output_fp('submission.xml')

    if not isdir(output_dir):
        makedirs(output_dir)
    else:
        raise IOError('The output folder already exists: %s' % output_dir)

    with open(samp_fp, 'U') as st, open(prep_fp, 'U') as pt:
        submission = EBISubmission.from_templates_and_per_sample_fastqs(
            preprocessed_data_id_str,
            study.title,
            study.info['study_abstract'],
            investigation_type,
            st,
            pt,
            fastq_dir_fp,
            new_investigation_type=new_investigation_type,
            pmids=study.pmids)

    submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp,
                                   submission_fp, action)

    if send:
        submission.send_sequences()
        study_accession, submission_accession = submission.send_xml()

        if study_accession is None or submission_accession is None:
            preprocessed_data.update_insdc_status('failed')

            raise ComputeError("EBI Submission failed!")
        else:
            preprocessed_data.update_insdc_status('success', study_accession,
                                                  submission_accession)
    else:
        study_accession, submission_accession = None, None

    return study_accession, submission_accession
Ejemplo n.º 3
0
def submit_EBI(artifact_id, action, send, test=False):
    """Submit an artifact to EBI

    Parameters
    ----------
    artifact_id : int
        The artifact id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    test : bool
        If True some restrictions will be ignored, only used in parse_EBI_reply
    """
    # step 1: init and validate
    ebi_submission = EBISubmission(artifact_id, action)

    # step 2: generate demux fastq files
    try:
        ebi_submission.generate_demultiplexed_fastq()
    except Exception:
        error_msg = format_exc()
        if isdir(ebi_submission.full_ebi_dir):
            rmtree(ebi_submission.full_ebi_dir)
        LogEntry.create('Runtime',
                        error_msg,
                        info={'ebi_submission': artifact_id})
        raise

    # step 3: generate and write xml files
    ebi_submission.generate_xml_files()

    if send:
        # getting aspera's password
        old_ascp_pass = environ.get('ASPERA_SCP_PASS', '')
        if old_ascp_pass == '':
            environ['ASPERA_SCP_PASS'] = qiita_config.ebi_seq_xfer_pass
        ascp_passwd = environ['ASPERA_SCP_PASS']
        LogEntry.create('Runtime',
                        ('Submission of sequences of pre_processed_id: '
                         '%d completed successfully' % artifact_id))

        # step 4: sending sequences
        if action != 'MODIFY':
            LogEntry.create('Runtime',
                            ("Submitting sequences for pre_processed_id: "
                             "%d" % artifact_id))
            for cmd in ebi_submission.generate_send_sequences_cmd():
                stdout, stderr, rv = system_call(cmd)
                if rv != 0:
                    error_msg = ("ASCP Error:\nStd output:%s\nStd error:%s" %
                                 (stdout, stderr))
                    environ['ASPERA_SCP_PASS'] = old_ascp_pass
                    raise ComputeError(error_msg)
                open(ebi_submission.ascp_reply,
                     'a').write('stdout:\n%s\n\nstderr: %s' % (stdout, stderr))
        environ['ASPERA_SCP_PASS'] = old_ascp_pass

        # step 5: sending xml and parsing answer
        xmls_cmds = ebi_submission.generate_curl_command(
            ebi_seq_xfer_pass=ascp_passwd)
        LogEntry.create('Runtime', ("Submitting XMLs for pre_processed_id: "
                                    "%d" % artifact_id))
        xml_content, stderr, rv = system_call(xmls_cmds)
        if rv != 0:
            error_msg = ("Error:\nStd output:%s\nStd error:%s" %
                         (xml_content, stderr))
            raise ComputeError(error_msg)
        else:
            LogEntry.create('Runtime',
                            ('Submission of sequences of pre_processed_id: '
                             '%d completed successfully' % artifact_id))
        open(ebi_submission.curl_reply,
             'w').write('stdout:\n%s\n\nstderr: %s' % (xml_content, stderr))

        try:
            st_acc, sa_acc, bio_acc, ex_acc, run_acc = \
                ebi_submission.parse_EBI_reply(xml_content, test=test)
        except EBISubmissionError as e:
            error = str(e)
            le = LogEntry.create('Fatal',
                                 "Command: %s\nError: %s\n" %
                                 (xml_content, error),
                                 info={'ebi_submission': artifact_id})
            raise ComputeError("EBI Submission failed! Log id: %d\n%s" %
                               (le.id, error))

        if action == 'ADD' or test:
            if st_acc:
                ebi_submission.study.ebi_study_accession = st_acc
            if sa_acc:
                ebi_submission.sample_template.ebi_sample_accessions = sa_acc
            if bio_acc:
                ebi_submission.sample_template.biosample_accessions = bio_acc
            if ex_acc:
                ebi_submission.prep_template.ebi_experiment_accessions = ex_acc
            ebi_submission.artifact.ebi_run_accessions = run_acc
    else:
        st_acc, sa_acc, bio_acc, ex_acc, run_acc = None, None, None, None, None

    return st_acc, sa_acc, bio_acc, ex_acc, run_acc
Ejemplo n.º 4
0
def submit_VAMPS(artifact_id):
    """Submit artifact to VAMPS

    Parameters
    ----------
    artifact_id : int
        The artifact id

    Raises
    ------
    ComputeError
        - If the artifact cannot be submitted to VAMPS
        - If the artifact is associated with more than one prep template
    """
    artifact = Artifact(artifact_id)
    if not artifact.can_be_submitted_to_vamps:
        raise ComputeError("Artifact %d cannot be submitted to VAMPS" %
                           artifact_id)
    study = artifact.study
    sample_template = study.sample_template
    prep_templates = artifact.prep_templates
    if len(prep_templates) > 1:
        raise ComputeError(
            "Multiple prep templates associated with the artifact: %s" %
            artifact_id)
    prep_template = prep_templates[0]

    # Also need to check that is not submitting (see item in #1523)
    if artifact.is_submitted_to_vamps:
        raise ValueError("Cannot resubmit artifact %s to VAMPS!" % artifact_id)

    # Generating a tgz
    targz_folder = mkdtemp(prefix=qiita_config.working_dir)
    targz_fp = join(targz_folder,
                    '%d_%d_%d.tgz' % (study.id, prep_template.id, artifact_id))
    targz = taropen(targz_fp, mode='w:gz')

    # adding sample/prep
    samp_fp = join(targz_folder, 'sample_metadata.txt')
    sample_template.to_file(samp_fp)
    targz.add(samp_fp, arcname='sample_metadata.txt')
    prep_fp = join(targz_folder, 'prep_metadata.txt')
    prep_template.to_file(prep_fp)
    targz.add(prep_fp, arcname='prep_metadata.txt')

    # adding preprocessed data
    for _, fp, fp_type in artifact.filepaths:
        if fp_type == 'preprocessed_fasta':
            targz.add(fp, arcname='preprocessed_fasta.fna')

    targz.close()

    # submitting
    cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F "
           "press=UploadFile %s" %
           (qiita_config.vamps_user, qiita_config.vamps_pass, targz_fp,
            qiita_config.vamps_url))
    obs, stderr, rv = system_call(cmd)
    if rv != 0:
        error_msg = ("Error:\nStd output:%s\nStd error:%s" % (obs, stderr))
        raise ComputeError(error_msg)

    exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n"
           "<body>\n</body>\n</html>")

    if obs != exp:
        return False
    else:
        artifact.is_submitted_to_vamps = True
        return True
Ejemplo n.º 5
0
def submit_EBI(artifact_id, action, send, test=False, test_size=False):
    """Submit an artifact to EBI

    Parameters
    ----------
    artifact_id : int
        The artifact id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    test : bool
        If True some restrictions will be ignored, only used in parse_EBI_reply
    test_size : bool
        If True the EBI-ENA restriction size will be changed to 6000
    """
    # step 1: init and validate
    ebi_submission = EBISubmission(artifact_id, action)

    # step 2: generate demux fastq files
    try:
        ebi_submission.generate_demultiplexed_fastq()
    except Exception:
        error_msg = format_exc()
        if isdir(ebi_submission.full_ebi_dir):
            rmtree(ebi_submission.full_ebi_dir)
        LogEntry.create('Runtime',
                        error_msg,
                        info={'ebi_submission': artifact_id})
        raise

    # step 3: generate and write xml files
    ebi_submission.generate_xml_files()

    # before we continue let's check the size of the submission
    to_review = [
        ebi_submission.study_xml_fp, ebi_submission.sample_xml_fp,
        ebi_submission.experiment_xml_fp, ebi_submission.run_xml_fp,
        ebi_submission.submission_xml_fp
    ]
    total_size = sum([stat(tr).st_size for tr in to_review if tr is not None])
    # note that the max for EBI is 10M but let's play it safe
    max_size = 10e+6 if not test_size else 5000
    if total_size > max_size:
        LogEntry.create(
            'Runtime', 'The submission: %d is larger than allowed (%d), will '
            'try to fix: %d' % (artifact_id, max_size, total_size))
        # transform current metadata to dataframe for easier curation
        rows = {k: dict(v) for k, v in ebi_submission.samples.items()}
        df = pd.DataFrame.from_dict(rows, orient='index')
        # remove unique columns and same value in all columns
        nunique = df.apply(pd.Series.nunique)
        nsamples = len(df.index)
        cols_to_drop = set(nunique[(nunique == 1) |
                                   (nunique == nsamples)].index)
        # maximize deletion by removing also columns that are almost all the
        # same or almost all unique
        cols_to_drop = set(nunique[(nunique <= int(nsamples * .01)) |
                                   (nunique >= int(nsamples * .5))].index)
        cols_to_drop = cols_to_drop - {
            'taxon_id', 'scientific_name', 'description'
        }
        all_samples = ebi_submission.sample_template.ebi_sample_accessions
        samples = [k for k in ebi_submission.samples if all_samples[k] is None]
        if samples:
            ebi_submission.write_xml_file(
                ebi_submission.generate_sample_xml(samples, cols_to_drop),
                ebi_submission.sample_xml_fp)

        # now let's recalculate the size to make sure it's fine
        new_total_size = sum(
            [stat(tr).st_size for tr in to_review if tr is not None])
        LogEntry.create(
            'Runtime', 'The submission: %d after cleaning is %d and was %d' %
            (artifact_id, total_size, new_total_size))
        if new_total_size > max_size:
            raise ComputeError(
                'Even after cleaning the submission: %d is too large. Before '
                'cleaning: %d, after: %d' %
                (artifact_id, total_size, new_total_size))

    st_acc, sa_acc, bio_acc, ex_acc, run_acc = None, None, None, None, None
    if send:
        # getting aspera's password
        old_ascp_pass = environ.get('ASPERA_SCP_PASS', '')
        if old_ascp_pass == '':
            environ['ASPERA_SCP_PASS'] = qiita_config.ebi_seq_xfer_pass
        ascp_passwd = environ['ASPERA_SCP_PASS']
        LogEntry.create('Runtime',
                        ('Submission of sequences of pre_processed_id: '
                         '%d completed successfully' % artifact_id))

        # step 4: sending sequences
        if action != 'MODIFY':
            LogEntry.create('Runtime',
                            ("Submitting sequences for pre_processed_id: "
                             "%d" % artifact_id))
            for cmd in ebi_submission.generate_send_sequences_cmd():
                stdout, stderr, rv = system_call(cmd)
                if rv != 0:
                    error_msg = ("ASCP Error:\nStd output:%s\nStd error:%s" %
                                 (stdout, stderr))
                    environ['ASPERA_SCP_PASS'] = old_ascp_pass
                    raise ComputeError(error_msg)
                open(ebi_submission.ascp_reply,
                     'a').write('stdout:\n%s\n\nstderr: %s' % (stdout, stderr))
        environ['ASPERA_SCP_PASS'] = old_ascp_pass

        # step 5: sending xml
        xmls_cmds = ebi_submission.generate_curl_command(
            ebi_seq_xfer_pass=ascp_passwd)
        LogEntry.create('Runtime', ("Submitting XMLs for pre_processed_id: "
                                    "%d" % artifact_id))
        xml_content, stderr, rv = system_call(xmls_cmds)
        if rv != 0:
            error_msg = ("Error:\nStd output:%s\nStd error:%s" %
                         (xml_content, stderr))
            raise ComputeError(error_msg)
        else:
            LogEntry.create('Runtime',
                            ('Submission of sequences of pre_processed_id: '
                             '%d completed successfully' % artifact_id))
        open(ebi_submission.curl_reply,
             'w').write('stdout:\n%s\n\nstderr: %s' % (xml_content, stderr))

        # parsing answer / only if adding
        if action == 'ADD' or test:
            try:
                st_acc, sa_acc, bio_acc, ex_acc, run_acc = \
                    ebi_submission.parse_EBI_reply(xml_content, test=test)
            except EBISubmissionError as e:
                error = str(e)
                le = LogEntry.create('Fatal',
                                     "Command: %s\nError: %s\n" %
                                     (xml_content, error),
                                     info={'ebi_submission': artifact_id})
                raise ComputeError("EBI Submission failed! Log id: %d\n%s" %
                                   (le.id, error))

            if st_acc:
                ebi_submission.study.ebi_study_accession = st_acc
            if sa_acc:
                ebi_submission.sample_template.ebi_sample_accessions = sa_acc
            if bio_acc:
                ebi_submission.sample_template.biosample_accessions = bio_acc
            if ex_acc:
                ebi_submission.prep_template.ebi_experiment_accessions = ex_acc
            ebi_submission.artifact.ebi_run_accessions = run_acc

    return st_acc, sa_acc, bio_acc, ex_acc, run_acc
Ejemplo n.º 6
0
def submit_EBI(preprocessed_data_id, action, send):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    """
    # step 1: init and validate
    ebi_submission = EBISubmission(preprocessed_data_id, action)

    # step 2: generate demux fastq files
    ebi_submission.study.ebi_submission_status = 'submitting'
    try:
        ebi_submission.generate_demultiplexed_fastq()
    except:
        error_msg = format_exc()
        if isdir(ebi_submission.full_ebi_dir):
            rmtree(ebi_submission.full_ebi_dir)
        ebi_submission.study.ebi_submission_status = 'failed: %s' % error_msg
        LogEntry.create('Runtime',
                        error_msg,
                        info={'ebi_submission': preprocessed_data_id})
        raise

    # step 3: generate and write xml files
    ebi_submission.generate_xml_files()

    if send:
        # step 4: sending sequences
        if action != 'MODIFY':
            old_ascp_pass = environ.get('ASPERA_SCP_PASS', '')
            environ['ASPERA_SCP_PASS'] = qiita_config.ebi_seq_xfer_pass

            LogEntry.create('Runtime',
                            ("Submitting sequences for pre_processed_id: "
                             "%d" % preprocessed_data_id))
            for cmd in ebi_submission.generate_send_sequences_cmd():
                stdout, stderr, rv = system_call(cmd)
                if rv != 0:
                    error_msg = ("Error:\nStd output:%s\nStd error:%s" %
                                 (stdout, stderr))
                    raise ComputeError(error_msg)
                open(ebi_submission.ascp_reply,
                     'a').write('stdout:\n%s\n\nstderr: %s' % (stdout, stderr))
            environ['ASPERA_SCP_PASS'] = old_ascp_pass
            LogEntry.create(
                'Runtime',
                ('Submission of sequences of pre_processed_id: '
                 '%d completed successfully' % preprocessed_data_id))

        # step 5: sending xml and parsing answer
        xmls_cmds = ebi_submission.generate_curl_command()
        LogEntry.create('Runtime', ("Submitting XMLs for pre_processed_id: "
                                    "%d" % preprocessed_data_id))
        xml_content, stderr, rv = system_call(xmls_cmds)
        if rv != 0:
            error_msg = ("Error:\nStd output:%s\nStd error:%s" %
                         (xml_content, stderr))
            raise ComputeError(error_msg)
        else:
            LogEntry.create(
                'Runtime',
                ('Submission of sequences of pre_processed_id: '
                 '%d completed successfully' % preprocessed_data_id))
        open(ebi_submission.curl_reply,
             'w').write('stdout:\n%s\n\nstderr: %s' % (xml_content, stderr))

        try:
            st_acc, sa_acc, bio_acc, ex_acc, run_acc = \
                ebi_submission.parse_EBI_reply(xml_content)
        except EBISubmissionError as e:
            le = LogEntry.create('Fatal',
                                 "Command: %s\nError: %s\n" %
                                 (xml_content, str(e)),
                                 info={'ebi_submission': preprocessed_data_id})
            ebi_submission.study.ebi_submission_status = (
                "failed: XML parsing, log id: %d" % le.id)
            raise ComputeError("EBI Submission failed! Log id: %d" % le.id)

        ebi_submission.study.ebi_submission_status = 'submitted'
        if action == 'ADD':
            if st_acc:
                ebi_submission.study.ebi_study_accession = st_acc
            if sa_acc:
                ebi_submission.sample_template.ebi_sample_accessions = sa_acc
            if bio_acc:
                ebi_submission.sample_template.biosample_accessions = bio_acc
            if ex_acc:
                ebi_submission.prep_template.ebi_experiment_accessions = ex_acc
            ebi_submission.artifact.ebi_run_accessions = run_acc
    else:
        st_acc, sa_acc, bio_acc, ex_acc, run_acc = None, None, None, None, None

    return st_acc, sa_acc, bio_acc, ex_acc, run_acc