Beispiel #1
0
 def test_sra_dump_submission_xml_biis7(self):
     sra_settings = self._sra_default_config
     json2sra.convert2(open(os.path.join(self._json_data_dir, 'BII-S-7', 'BII-S-7.json')), self._tmp_dir, sra_settings=sra_settings, validate_first=False)
     # Now try load the SRA output in test and compare against the expected output in test data directory
     submission_xml = open(os.path.join(self._tmp_dir, 'submission.xml'), 'rb').read()
     actual_submission_xml_biis7 = etree.fromstring(submission_xml)
     self.assertTrue(utils.assert_xml_equal(self._expected_submission_xml_biis7, actual_submission_xml_biis7))
Beispiel #2
0
 def test_sra_dump_file_set(self):
     json2sra.convert2(open(os.path.join(self._json_data_dir, 'BII-S-3', 'BII-S-3.json')), self._tmp_dir, validate_first=False)
     # SRA should always produce experiment_set.xml, run_set.xml, sample_set.xml study.xml and submission.xml
     expected_sra_path = os.path.join(self._tmp_dir)
     expected_file_set = {'experiment_set.xml', 'run_set.xml', 'sample_set.xml', 'project_set.xml', 'submission.xml'}
     if os.path.exists(expected_sra_path):
         actual_file_set = set(os.listdir(expected_sra_path))
         extra_files_found = actual_file_set - expected_file_set
         if len(extra_files_found) > 0:
             self.fail("Unexpected file found in SRA output: " + str(extra_files_found))
         expected_files_missing = expected_file_set - actual_file_set
         if len(expected_files_missing) > 0:
             self.fail("Unexpected file found in SRA output: " + str(expected_files_missing))
Beispiel #3
0
def convert(source_path, dest_path, sra_settings=None, validate_first=True):
    from isatools.convert import isatab2json, json2sra
    isa_json = isatab2json.convert(source_path, validate_first=validate_first)
    isa_json_fp = StringIO(json.dumps(isa_json))
    isa_json_fp.name = "BII-S-3.json"
    json2sra.convert2(isa_json_fp, dest_path, sra_settings=sra_settings, validate_first=False)
    logging.info("Conversion complete...")
    buffer = BytesIO()
    if os.path.isdir(dest_path):
        with ZipFile(buffer, 'w') as zip_file:
            # use relative dir_name to avoid absolute path on file names
            zipdir(dest_path, zip_file)
            print(zip_file.namelist())

            # clean up the target directory after the ZIP file has been closed
            # rmtree(sra_dir)

        buffer.seek(0)
        return buffer
def convert(source_path, dest_path, sra_settings=None, validate_first=True):
    from isatools.convert import isatab2json, json2sra
    isa_json = isatab2json.convert(source_path, validate_first=validate_first)
    isa_json_fp = StringIO(json.dumps(isa_json))
    isa_json_fp.name = "BII-S-3.json"
    json2sra.convert2(isa_json_fp,
                      dest_path,
                      sra_settings=sra_settings,
                      validate_first=False)
    logging.info("Conversion complete...")
    buffer = BytesIO()
    if os.path.isdir(dest_path):
        with ZipFile(buffer, 'w') as zip_file:
            # use relative dir_name to avoid absolute path on file names
            zipdir(dest_path, zip_file)
            print(zip_file.namelist())

            # clean up the target directory after the ZIP file has been closed
            # rmtree(sra_dir)

        buffer.seek(0)
        return buffer
Beispiel #5
0
    def _do_aspera_transfer(self, transfer_token=None, user_name=None, password=None, remote_path=None, file_path=None,
                            path2library=None, sub_id=None):

        # check submission status
        submission_status = Submission().isComplete(sub_id)

        if not submission_status or submission_status == 'false':

            lg.log('Starting aspera transfer', level=Loglvl.INFO, type=Logtype.FILE)

            kwargs = dict(target_id=sub_id, commenced_on=str(datetime.now()))
            Submission().save_record(dict(), **kwargs)

            # k is a loop counter which keeps track of the number of files transfered
            k = -1
            f_str = str()
            for f in file_path:
                f_str = f_str + ' ' + f
            cmd = "./ascp -d -QT -l300M -L- {f_str!s} {user_name!s}:{remote_path!s}".format(**locals())
            lg.log(cmd, level=Loglvl.INFO, type=Logtype.FILE)
            os.chdir(path2library)

            try:
                thread = pexpect.spawn(cmd, timeout=None)
                thread.expect(["assword:", pexpect.EOF])
                thread.sendline(password)

                cpl = thread.compile_pattern_list([pexpect.EOF, '(.+)'])

                while True:
                    i = thread.expect_list(cpl, timeout=None)
                    if i == 0:  # EOF! Possible error point if encountered before transfer completion
                        print("Process termination - check exit status!")
                        break
                    elif i == 1:
                        pexp_match = thread.match.group(1)
                        prev_file = ''
                        tokens_to_match = ["Mb/s"]
                        units_to_match = ["KB", "MB"]
                        time_units = ['d', 'h', 'm', 's']
                        end_of_transfer = False

                        if all(tm in pexp_match.decode("utf-8") for tm in tokens_to_match):
                            fields = {
                                "transfer_status": "transferring",
                                "current_time": datetime.now().strftime("%d-%m-%Y %H:%M:%S")
                            }

                            tokens = pexp_match.decode("utf-8").split(" ")

                            for token in tokens:
                                if not token == '':
                                    if "file" in token:
                                        fields['file_path'] = token.split('=')[-1]
                                        if prev_file != fields['file_path']:
                                            k = k + 1
                                        prev_file == fields['file_path']
                                    elif '%' in token:
                                        pct = float((token.rstrip("%")))
                                        # pct = (1/len(file_path) * pct) + (k * 1/len(file_path) * 100)
                                        fields['pct_completed'] = pct
                                        # flag end of transfer
                                        print(str(transfer_token) + ":  " + str(pct) + '% transfered')
                                        if token.rstrip("%") == 100:
                                            end_of_transfer = True
                                    elif any(um in token for um in units_to_match):
                                        fields['amt_transferred'] = token
                                    elif "Mb/s" in token or "Mbps" in token:
                                        t = token[:-4]
                                        if '=' in t:
                                            fields['transfer_rate'] = t[t.find('=') + 1:]
                                        else:
                                            fields['transfer_rate'] = t
                                    elif "status" in token:
                                        fields['transfer_status'] = token.split('=')[-1]
                                    elif "rate" in token:
                                        fields['transfer_rate'] = token.split('=')[-1]
                                    elif "elapsed" in token:
                                        fields['elapsed_time'] = token.split('=')[-1]
                                    elif "loss" in token:
                                        fields['bytes_lost'] = token.split('=')[-1]
                                    elif "size" in token:
                                        fields['file_size_bytes'] = token.split('=')[-1]

                                    elif "ETA" in token:
                                        eta = tokens[-2]
                                        estimated_completion = ""
                                        eta_split = eta.split(":")
                                        t_u = time_units[-len(eta_split):]
                                        for indx, eta_token in enumerate(eta.split(":")):
                                            if eta_token == "00":
                                                continue
                                            estimated_completion += eta_token + t_u[indx] + " "
                                        fields['estimated_completion'] = estimated_completion
                            RemoteDataFile().update_transfer(transfer_token, fields)

                kwargs = dict(target_id=sub_id, completed_on=datetime.now())
                Submission().save_record(dict(), **kwargs)
                # close thread
                thread.close()
                lg.log('Aspera Transfer completed', level=Loglvl.INFO, type=Logtype.FILE)

            except OSError:
                return redirect('web.apps.web_copo.views.goto_error', request=HttpRequest(),
                                message='There appears to be an issue with EBI.')

        # setup paths for conversion directories
        conv_dir = os.path.join(self._dir, sub_id)
        if not os.path.exists(os.path.join(conv_dir, 'json')):
            os.makedirs(os.path.join(conv_dir, 'json'))
        json_file_path = os.path.join(conv_dir, 'json', 'isa_json.json')
        xml_dir = conv_dir
        xml_path = os.path.join(xml_dir, 'run_set.xml')

        #  Convert COPO JSON to ISA JSON
        lg.log('Obtaining ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE)
        conv = cnv.Investigation(submission_token=sub_id)
        meta = conv.get_schema()
        json_file = open(json_file_path, '+w')
        # dump metadata to output file
        json_file.write(dumps(meta))
        json_file.close()

        # Validate ISA_JSON
        lg.log('Validating ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE)
        with open(json_file_path) as json_file:
            v = isajson.validate(json_file)
            lg.log(v, level=Loglvl.INFO, type=Logtype.FILE)

        # convert to SRA with isatools converter
        lg.log('Converting to SRA', level=Loglvl.INFO, type=Logtype.FILE)
        sra_settings = d_utils.json_to_pytype(SRA_SETTINGS).get("properties", dict())
        datafilehashes = conv.get_datafilehashes()
        json2sra.convert2(json_fp=open(json_file_path), path=conv_dir, sra_settings=sra_settings,
                          datafilehashes=datafilehashes, validate_first=False)

        # finally submit to SRA
        lg.log('Submitting XMLS to ENA via CURL', level=Loglvl.INFO, type=Logtype.FILE)
        submission_file = os.path.join(xml_dir, 'submission.xml')
        project_file = os.path.join(xml_dir, 'project_set.xml')
        sample_file = os.path.join(xml_dir, 'sample_set.xml')
        experiment_file = os.path.join(xml_dir, 'experiment_set.xml')
        run_file = os.path.join(xml_dir, 'run_set.xml')

        curl_cmd = 'curl -k -F "SUBMISSION=@' + submission_file + '" \
         -F "PROJECT=@' + os.path.join(remote_path, project_file) + '" \
         -F "SAMPLE=@' + os.path.join(remote_path, sample_file) + '" \
         -F "EXPERIMENT=@' + os.path.join(remote_path, experiment_file) + '" \
         -F "RUN=@' + os.path.join(remote_path, run_file) + '" \
         "https://www-test.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA%20Webin-39233%20Apple123"'

        output = subprocess.check_output(curl_cmd, shell=True)
        lg.log(output, level=Loglvl.INFO, type=Logtype.FILE)
        lg.log("Extracting fields from receipt", level=Loglvl.INFO, type=Logtype.FILE)

        xml = ET.fromstring(output)

        accessions = dict()

        # get project accessions
        project = xml.find('./PROJECT')
        project_accession = project.get('accession', default='undefined')
        project_alias = project.get('alias', default='undefined')
        accessions['project'] = {'accession': project_accession, 'alias': project_alias}

        # get experiment accessions
        experiment = xml.find('./EXPERIMENT')
        experiment_accession = experiment.get('accession', default='undefined')
        experiment_alias = experiment.get('alias', default='undefined')
        accessions['experiment'] = {'accession': experiment_accession, 'alias': experiment_alias}

        # get submission accessions
        submission = xml.find('./SUBMISSION')
        submission_accession = submission.get('accession', default='undefined')
        submission_alias = submission.get('alias', default='undefined')
        accessions['submission'] = {'accession': submission_accession, 'alias': submission_alias}

        # get run accessions
        run = xml.find('./RUN')
        run_accession = run.get('accession', default='undefined')
        run_alias = run.get('alias', default='undefined')
        accessions['run'] = {'accession': run_accession, 'alias': run_alias}

        # get sample accessions
        samples = xml.findall('./SAMPLE')
        sample_accessions = list()
        for sample in samples:
            sample_accession = sample.get('accession', default='undefined')
            sample_alias = sample.get('alias', default='undefined')
            s = {'sample_accession': sample_accession, 'sample_alias': sample_alias}
            for bio_s in sample:
                s['biosample_accession'] = bio_s.get('accession', default='undefined')
            sample_accessions.append(s)
        accessions['sample'] = sample_accessions

        # save accessions to mongo profile record
        s = Submission().get_record(sub_id)
        s['accessions'] = accessions
        s['complete'] = True
        s['target_id'] = str(s.pop('_id'))
        Submission().save_record(dict(), **s)
Beispiel #6
0
 def test_sra_dump_run_set_xml_biis3(self):
     json2sra.convert2(open(os.path.join(self._json_data_dir, 'BII-S-3', 'BII-S-3.json')), self._tmp_dir, validate_first=False)
     # Now try load the SRA output in test and compare against the expected output in test data directory
     run_set_xml = open(os.path.join(self._tmp_dir, 'run_set.xml'), 'rb').read()
     actual_run_set_xml_biis3 = etree.fromstring(run_set_xml)
     self.assertTrue(utils.assert_xml_equal(self._expected_run_set_xml_biis3, actual_run_set_xml_biis3))