def test_sra_dump_run_set_xml_biis3(self): with open(os.path.join(self._json_data_dir, 'BII-S-3', 'BII-S-3.json')) as json_fp: json2sra.convert(json_fp, self._tmp_dir, validate_first=False) # Now try load the SRA output in test and compare against the expected output in test data directory with open(os.path.join(self._tmp_dir, 'run_set.xml'), 'rb') as rs_fp: run_set_xml = rs_fp.read() actual_run_set_xml_biis3 = etree.fromstring(run_set_xml) self.assertTrue( utils.assert_xml_equal(self._expected_run_set_xml_biis3, actual_run_set_xml_biis3))
def test_sra_dump_submission_xml_biis3(self): json2sra.convert(open( os.path.join(self._json_data_dir, 'BII-S-3', 'BII-S-3.json')), self._tmp_dir, validate_first=False) # Now try load the SRA output in test and compare against the expected output in test data directory submission_xml = open(os.path.join(self._tmp_dir, 'submission.xml'), 'rb').read() actual_submission_xml_biis3 = etree.fromstring(submission_xml) self.assertTrue( utils.assert_xml_equal(self._expected_submission_xml_biis3, actual_submission_xml_biis3))
def test_sra_dump_run_set_xml_biis7(self): json2sra.convert(open( os.path.join(self._json_data_dir, 'BII-S-7', 'BII-S-7.json')), self._tmp_dir, validate_first=False) # Now try load the SRA output in test and compare against the expected output in test data directory run_set_xml = open(os.path.join(self._tmp_dir, 'run_set.xml'), 'rb').read() actual_run_set_xml_biis7 = etree.fromstring(run_set_xml) self.assertTrue( utils.assert_xml_equal(self._expected_run_set_xml_biis7, actual_run_set_xml_biis7))
def test_sra_dump_sample_set_xml_biis7(self): sra_settings = self._sra_default_config with open(os.path.join(self._json_data_dir, 'BII-S-7', 'BII-S-7.json')) as json_fp: json2sra.convert(json_fp, self._tmp_dir, sra_settings=sra_settings, validate_first=False) # Now try load the SRA output in test and compare against the expected output in test data directory with open(os.path.join(self._tmp_dir, 'sample_set.xml'), 'rb') as ss_fp: sample_set_xml = ss_fp.read() actual_sample_set_xml_biis7 = etree.fromstring(sample_set_xml) self.assertTrue( utils.assert_xml_equal(self._expected_sample_set_xml_biis7, actual_sample_set_xml_biis7))
def _convert_to_sra(self): self.validate_isajson() lg.log('Converting to SRA', level=Loglvl.INFO, type=Logtype.FILE) sra_settings = d_utils.json_to_pytype(SRA_SETTINGS).get( "properties", dict()) datafilehashes = self.collated_records["datafilehashes"] json2sra.convert(json_fp=open( os.path.join(self.json_path, 'isa_json.json')), path=self.xml_path, sra_settings=sra_settings, datafilehashes=datafilehashes, validate_first=False) self.context["ena_status"] = "converted_to_sra" return
def convert(source_path, dest_path, sra_settings=None, validate_first=True): log.info("Converting ISA-Tab to JSON for %s", source_path) isa_json = isatab2json.convert(source_path, validate_first=validate_first) log.debug("Writing JSON to memory file") isa_json_fp = StringIO(json.dumps(isa_json)) isa_json_fp.name = "BII-S-3.json" log.info("Converting JSON to SRA, writing to %s", dest_path) log.info("Using SRA settings %s", sra_settings) json2sra.convert(isa_json_fp, dest_path, sra_settings=sra_settings, validate_first=False) log.info("Conversion from ISA-Tab to SRA complete") buffer = BytesIO() if os.path.isdir(dest_path): log.info("Zipping SRA files") with ZipFile(buffer, 'w') as zip_file: zipdir(dest_path, zip_file) log.debug("Zipped %s", zip_file.namelist()) buffer.seek(0) log.info("Returning zipped files as memory file") return buffer
def test_sra_dump_file_set(self): with open(os.path.join(self._json_data_dir, 'BII-S-3', 'BII-S-3.json')) as json_fp: json2sra.convert(json_fp, self._tmp_dir, validate_first=False) # SRA should always produce experiment_set.xml, run_set.xml, sample_set.xml study.xml and submission.xml expected_sra_path = os.path.join(self._tmp_dir) expected_file_set = { 'experiment_set.xml', 'run_set.xml', 'sample_set.xml', 'project_set.xml', 'submission.xml' } if os.path.exists(expected_sra_path): actual_file_set = set(os.listdir(expected_sra_path)) extra_files_found = actual_file_set - expected_file_set if len(extra_files_found) > 0: self.fail("Unexpected file found in SRA output: " + str(extra_files_found)) expected_files_missing = expected_file_set - actual_file_set if len(expected_files_missing) > 0: self.fail("Unexpected file found in SRA output: " + str(expected_files_missing))
def convert(source_path, dest_path, sra_settings=None, validate_first=True): isa_json = isatab2json.convert(source_path, validate_first=validate_first) isa_json_fp = StringIO(json.dumps(isa_json)) isa_json_fp.name = "BII-S-3.json" json2sra.convert(isa_json_fp, dest_path, sra_settings=sra_settings, validate_first=False) logging.info("Conversion complete...") buffer = BytesIO() if os.path.isdir(dest_path): with ZipFile(buffer, 'w') as zip_file: # use relative dir_name to avoid absolute path on file names zipdir(dest_path, zip_file) print(zip_file.namelist()) # clean up the target directory after the ZIP file has been closed # rmtree(sra_dir) buffer.seek(0) return buffer
def post(self): response = Response(status=500) # Create temporary directory tmp_dir = _create_temp_dir() target_tmp_dir = _create_temp_dir() try: if tmp_dir is None: raise IOError("Could not create temporary directory " + tmp_dir) if not request.mimetype == "application/zip": raise TypeError("Incorrect media type received. Got " + request.mimetype + ", expected application/zip") else: # Write request data to file file_path = _write_request_data(request, tmp_dir, 'isatab.zip') if file_path is None: raise IOError("Could not create temporary file " + file_path) # Setup path to configuration config_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'isaconfig-default') with zipfile.ZipFile(file_path, 'r') as z: # extract ISArchive files z.extractall(tmp_dir) src_file_path = os.path.normpath(os.path.join(tmp_dir, z.filelist[0].filename)) # find just the combined JSON json2sra.convert(open(src_file_path), target_tmp_dir, config_dir) memf = io.BytesIO() with zipfile.ZipFile(memf, 'w') as zf: sub_path = os.path.splitext(z.filelist[0].filename)[0] for file in os.listdir(target_tmp_dir + '/sra/' + sub_path): zf.write(os.path.join(target_tmp_dir + '/sra/' + sub_path, file), file) memf.seek(0) response = send_file(memf, mimetype='application/zip') except TypeError as t: response = Response(status=415) except Exception as e: response = Response(status=500) finally: shutil.rmtree(tmp_dir, ignore_errors=True) shutil.rmtree(target_tmp_dir, ignore_errors=True) return response
def do_seq_reads_submission(self, sub_id, remote_path, transfer_token): # # setup paths for conversion directories conv_dir = os.path.join(self._dir, sub_id) if not os.path.exists(os.path.join(conv_dir, 'json')): os.makedirs(os.path.join(conv_dir, 'json')) json_file_path = os.path.join(conv_dir, 'json', 'isa_json.json') xml_dir = conv_dir xml_path = os.path.join(xml_dir, 'run_set.xml') # # # Convert COPO JSON to ISA JSON # lg.log('Obtaining ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE) conv = cnv.Investigation(submission_token=sub_id) meta = conv.get_schema() json_file = open(json_file_path, '+w') # # dump metadata to output file json_file.write(dumps(meta)) json_file.close() # Validate ISA_JSON lg.log('Validating ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE) with open(json_file_path) as json_file: v = isajson.validate(json_file) lg.log(v, level=Loglvl.INFO, type=Logtype.FILE) # convert to SRA with isatools converter lg.log('Converting to SRA', level=Loglvl.INFO, type=Logtype.FILE) sra_settings = d_utils.json_to_pytype(SRA_SETTINGS).get( "properties", dict()) datafilehashes = conv.get_datafilehashes() json2sra.convert(json_fp=open(json_file_path), path=conv_dir, sra_settings=sra_settings, datafilehashes=datafilehashes, validate_first=False) # finally submit to SRA lg.log('Submitting XMLS to ENA via CURL', level=Loglvl.INFO, type=Logtype.FILE) submission_file = os.path.join(xml_dir, 'submission.xml') project_file = os.path.join(xml_dir, 'project_set.xml') sample_file = os.path.join(xml_dir, 'sample_set.xml') experiment_file = os.path.join(xml_dir, 'experiment_set.xml') run_file = os.path.join(xml_dir, 'run_set.xml') pass_word = resolve_env.get_env('WEBIN_USER_PASSWORD') user_token = resolve_env.get_env('WEBIN_USER') ena_service = resolve_env.get_env('ENA_SERVICE') user_token = user_token.split("@")[0] ena_uri = "{ena_service!s}/ena/submit/drop-box/submit/?auth=ENA%20{user_token!s}%20{pass_word!s}".format( **locals()) curl_cmd = 'curl -k -F "SUBMISSION=@' + submission_file + '" \ -F "PROJECT=@' + os.path.join(remote_path, project_file) + '" \ -F "SAMPLE=@' + os.path.join(remote_path, sample_file) + '" \ -F "EXPERIMENT=@' + os.path.join(remote_path, experiment_file) + '" \ -F "RUN=@' + os.path.join(remote_path, run_file) + '"' \ + ' "' + ena_uri + '"' output = subprocess.check_output(curl_cmd, shell=True) lg.log(output, level=Loglvl.INFO, type=Logtype.FILE) lg.log("Extracting fields from receipt", level=Loglvl.INFO, type=Logtype.FILE) accessions = self.get_accessions(output, sub_id, transfer_token) return True