Beispiel #1
0
 def test_sra_dump_run_set_xml_biis3(self):
     with open(os.path.join(self._json_data_dir, 'BII-S-3',
                            'BII-S-3.json')) as json_fp:
         json2sra.convert(json_fp, self._tmp_dir, validate_first=False)
     # Now try load the SRA output in test and compare against the expected output in test data directory
     with open(os.path.join(self._tmp_dir, 'run_set.xml'), 'rb') as rs_fp:
         run_set_xml = rs_fp.read()
         actual_run_set_xml_biis3 = etree.fromstring(run_set_xml)
         self.assertTrue(
             utils.assert_xml_equal(self._expected_run_set_xml_biis3,
                                    actual_run_set_xml_biis3))
Beispiel #2
0
 def test_sra_dump_submission_xml_biis3(self):
     json2sra.convert(open(
         os.path.join(self._json_data_dir, 'BII-S-3', 'BII-S-3.json')),
                      self._tmp_dir,
                      validate_first=False)
     # Now try load the SRA output in test and compare against the expected output in test data directory
     submission_xml = open(os.path.join(self._tmp_dir, 'submission.xml'),
                           'rb').read()
     actual_submission_xml_biis3 = etree.fromstring(submission_xml)
     self.assertTrue(
         utils.assert_xml_equal(self._expected_submission_xml_biis3,
                                actual_submission_xml_biis3))
Beispiel #3
0
 def test_sra_dump_run_set_xml_biis7(self):
     json2sra.convert(open(
         os.path.join(self._json_data_dir, 'BII-S-7', 'BII-S-7.json')),
                      self._tmp_dir,
                      validate_first=False)
     # Now try load the SRA output in test and compare against the expected output in test data directory
     run_set_xml = open(os.path.join(self._tmp_dir, 'run_set.xml'),
                        'rb').read()
     actual_run_set_xml_biis7 = etree.fromstring(run_set_xml)
     self.assertTrue(
         utils.assert_xml_equal(self._expected_run_set_xml_biis7,
                                actual_run_set_xml_biis7))
Beispiel #4
0
 def test_sra_dump_sample_set_xml_biis7(self):
     sra_settings = self._sra_default_config
     with open(os.path.join(self._json_data_dir, 'BII-S-7',
                            'BII-S-7.json')) as json_fp:
         json2sra.convert(json_fp,
                          self._tmp_dir,
                          sra_settings=sra_settings,
                          validate_first=False)
     # Now try load the SRA output in test and compare against the expected output in test data directory
     with open(os.path.join(self._tmp_dir, 'sample_set.xml'),
               'rb') as ss_fp:
         sample_set_xml = ss_fp.read()
         actual_sample_set_xml_biis7 = etree.fromstring(sample_set_xml)
         self.assertTrue(
             utils.assert_xml_equal(self._expected_sample_set_xml_biis7,
                                    actual_sample_set_xml_biis7))
Beispiel #5
0
    def _convert_to_sra(self):
        self.validate_isajson()

        lg.log('Converting to SRA', level=Loglvl.INFO, type=Logtype.FILE)
        sra_settings = d_utils.json_to_pytype(SRA_SETTINGS).get(
            "properties", dict())

        datafilehashes = self.collated_records["datafilehashes"]

        json2sra.convert(json_fp=open(
            os.path.join(self.json_path, 'isa_json.json')),
                         path=self.xml_path,
                         sra_settings=sra_settings,
                         datafilehashes=datafilehashes,
                         validate_first=False)

        self.context["ena_status"] = "converted_to_sra"
        return
Beispiel #6
0
def convert(source_path, dest_path, sra_settings=None, validate_first=True):
    log.info("Converting ISA-Tab to JSON for %s", source_path)
    isa_json = isatab2json.convert(source_path, validate_first=validate_first)
    log.debug("Writing JSON to memory file")
    isa_json_fp = StringIO(json.dumps(isa_json))
    isa_json_fp.name = "BII-S-3.json"
    log.info("Converting JSON to SRA, writing to %s", dest_path)
    log.info("Using SRA settings %s", sra_settings)
    json2sra.convert(isa_json_fp, dest_path, sra_settings=sra_settings, validate_first=False)
    log.info("Conversion from ISA-Tab to SRA complete")
    buffer = BytesIO()
    if os.path.isdir(dest_path):
        log.info("Zipping SRA files")
        with ZipFile(buffer, 'w') as zip_file:
            zipdir(dest_path, zip_file)
            log.debug("Zipped %s", zip_file.namelist())
        buffer.seek(0)
        log.info("Returning zipped files as memory file")
        return buffer
Beispiel #7
0
 def test_sra_dump_file_set(self):
     with open(os.path.join(self._json_data_dir, 'BII-S-3',
                            'BII-S-3.json')) as json_fp:
         json2sra.convert(json_fp, self._tmp_dir, validate_first=False)
     # SRA should always produce experiment_set.xml, run_set.xml, sample_set.xml study.xml and submission.xml
     expected_sra_path = os.path.join(self._tmp_dir)
     expected_file_set = {
         'experiment_set.xml', 'run_set.xml', 'sample_set.xml',
         'project_set.xml', 'submission.xml'
     }
     if os.path.exists(expected_sra_path):
         actual_file_set = set(os.listdir(expected_sra_path))
         extra_files_found = actual_file_set - expected_file_set
         if len(extra_files_found) > 0:
             self.fail("Unexpected file found in SRA output: " +
                       str(extra_files_found))
         expected_files_missing = expected_file_set - actual_file_set
         if len(expected_files_missing) > 0:
             self.fail("Unexpected file found in SRA output: " +
                       str(expected_files_missing))
Beispiel #8
0
def convert(source_path, dest_path, sra_settings=None, validate_first=True):
    isa_json = isatab2json.convert(source_path, validate_first=validate_first)
    isa_json_fp = StringIO(json.dumps(isa_json))
    isa_json_fp.name = "BII-S-3.json"
    json2sra.convert(isa_json_fp,
                     dest_path,
                     sra_settings=sra_settings,
                     validate_first=False)
    logging.info("Conversion complete...")
    buffer = BytesIO()
    if os.path.isdir(dest_path):
        with ZipFile(buffer, 'w') as zip_file:
            # use relative dir_name to avoid absolute path on file names
            zipdir(dest_path, zip_file)
            print(zip_file.namelist())

            # clean up the target directory after the ZIP file has been closed
            # rmtree(sra_dir)

        buffer.seek(0)
        return buffer
Beispiel #9
0
    def post(self):
        response = Response(status=500)
        # Create temporary directory
        tmp_dir = _create_temp_dir()
        target_tmp_dir = _create_temp_dir()
        try:
            if tmp_dir is None:
                raise IOError("Could not create temporary directory " + tmp_dir)
            if not request.mimetype == "application/zip":
                raise TypeError("Incorrect media type received. Got " + request.mimetype +
                                ", expected application/zip")
            else:
                # Write request data to file
                file_path = _write_request_data(request, tmp_dir, 'isatab.zip')
                if file_path is None:
                    raise IOError("Could not create temporary file " + file_path)

                # Setup path to configuration
                config_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'isaconfig-default')
                with zipfile.ZipFile(file_path, 'r') as z:
                    # extract ISArchive files
                    z.extractall(tmp_dir)
                    src_file_path = os.path.normpath(os.path.join(tmp_dir, z.filelist[0].filename))
                    # find just the combined JSON
                    json2sra.convert(open(src_file_path), target_tmp_dir, config_dir)
                    memf = io.BytesIO()
                    with zipfile.ZipFile(memf, 'w') as zf:
                        sub_path = os.path.splitext(z.filelist[0].filename)[0]
                        for file in os.listdir(target_tmp_dir + '/sra/' + sub_path):
                            zf.write(os.path.join(target_tmp_dir + '/sra/' + sub_path, file), file)
                    memf.seek(0)
                    response = send_file(memf, mimetype='application/zip')
        except TypeError as t:
            response = Response(status=415)
        except Exception as e:
            response = Response(status=500)
        finally:
            shutil.rmtree(tmp_dir, ignore_errors=True)
            shutil.rmtree(target_tmp_dir, ignore_errors=True)
            return response
Beispiel #10
0
    def do_seq_reads_submission(self, sub_id, remote_path, transfer_token):
        # # setup paths for conversion directories
        conv_dir = os.path.join(self._dir, sub_id)
        if not os.path.exists(os.path.join(conv_dir, 'json')):
            os.makedirs(os.path.join(conv_dir, 'json'))
        json_file_path = os.path.join(conv_dir, 'json', 'isa_json.json')
        xml_dir = conv_dir
        xml_path = os.path.join(xml_dir, 'run_set.xml')
        #
        # #  Convert COPO JSON to ISA JSON
        # lg.log('Obtaining ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE)
        conv = cnv.Investigation(submission_token=sub_id)
        meta = conv.get_schema()
        json_file = open(json_file_path, '+w')
        # # dump metadata to output file
        json_file.write(dumps(meta))
        json_file.close()

        # Validate ISA_JSON
        lg.log('Validating ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE)

        with open(json_file_path) as json_file:
            v = isajson.validate(json_file)
            lg.log(v, level=Loglvl.INFO, type=Logtype.FILE)

        # convert to SRA with isatools converter
        lg.log('Converting to SRA', level=Loglvl.INFO, type=Logtype.FILE)
        sra_settings = d_utils.json_to_pytype(SRA_SETTINGS).get(
            "properties", dict())
        datafilehashes = conv.get_datafilehashes()
        json2sra.convert(json_fp=open(json_file_path),
                         path=conv_dir,
                         sra_settings=sra_settings,
                         datafilehashes=datafilehashes,
                         validate_first=False)

        # finally submit to SRA
        lg.log('Submitting XMLS to ENA via CURL',
               level=Loglvl.INFO,
               type=Logtype.FILE)
        submission_file = os.path.join(xml_dir, 'submission.xml')
        project_file = os.path.join(xml_dir, 'project_set.xml')
        sample_file = os.path.join(xml_dir, 'sample_set.xml')
        experiment_file = os.path.join(xml_dir, 'experiment_set.xml')
        run_file = os.path.join(xml_dir, 'run_set.xml')

        pass_word = resolve_env.get_env('WEBIN_USER_PASSWORD')
        user_token = resolve_env.get_env('WEBIN_USER')
        ena_service = resolve_env.get_env('ENA_SERVICE')
        user_token = user_token.split("@")[0]
        ena_uri = "{ena_service!s}/ena/submit/drop-box/submit/?auth=ENA%20{user_token!s}%20{pass_word!s}".format(
            **locals())

        curl_cmd = 'curl -k -F "SUBMISSION=@' + submission_file + '" \
                 -F "PROJECT=@'                                + os.path.join(remote_path, project_file) + '" \
                 -F "SAMPLE=@'                               + os.path.join(remote_path, sample_file) + '" \
                 -F "EXPERIMENT=@'                                   + os.path.join(remote_path, experiment_file) + '" \
                 -F "RUN=@'                            + os.path.join(remote_path, run_file) + '"' \
                   + '   "' + ena_uri + '"'

        output = subprocess.check_output(curl_cmd, shell=True)
        lg.log(output, level=Loglvl.INFO, type=Logtype.FILE)
        lg.log("Extracting fields from receipt",
               level=Loglvl.INFO,
               type=Logtype.FILE)

        accessions = self.get_accessions(output, sub_id, transfer_token)

        return True