def test_parse_qc_table_fastqc(summary, data, quality_metric_fastqc_schema): meta = fastqc_utils.parse_qc_table([summary, data], url='test_url', qc_schema=quality_metric_fastqc_schema) assert meta['Total Sequences'] == 557747 assert meta['Kmer Content'] == 'WARN' assert meta['url'] == 'test_url' assert meta['overall_quality_status'] == 'PASS'
def test_parse_qc_table_pairsqc(pairsqc_summary, quality_metric_pairsqc_schema): meta = fastqc_utils.parse_qc_table([pairsqc_summary], url='test_url', qc_schema=quality_metric_pairsqc_schema) assert meta['Total reads'] == 651962 assert meta['Cis/Trans ratio'] == 64.141 assert meta['convergence'] == 'Good'
def _qc_updater(status, awsemfile, ff_meta, tibanna, quality_metric='quality_metric_fastqc', file_argument='input_fastq', report_html=None, datafiles=None, zipped=True, datajson_argument=None, other_fields=None): if datajson_argument == awsemfile.argument_name: return # avoid using [] as default argument if datafiles is None: datafiles = ['summary.txt', 'fastqc_data.txt'] if status == 'uploading': # wait until this bad boy is finished return # keys ff_key = tibanna.ff_keys # move files to proper s3 location # need to remove sbg from this line accession = awsemfile.runner.get_file_accessions(file_argument)[0] zipped_report = awsemfile.key files_to_parse = datafiles if report_html: files_to_parse.append(report_html) printlog("accession is %s" % accession) jsondata = dict() if zipped: try: files = awsemfile.s3.unzip_s3_to_s3(zipped_report, accession, files_to_parse, acl='public-read') except Exception as e: printlog(tibanna.s3.__dict__) raise Exception("%s (key={})\n".format(zipped_report) % e) printlog("files : %s" % str(files)) filedata = [files[_]['data'] for _ in datafiles] else: if datajson_argument: datajson_key = awsemfile.runner.get_file_key(datajson_argument) jsondata0 = [ json.loads(awsemfile.s3.read_s3(_)) for _ in datajson_key ] for d in jsondata0: jsondata.update(d) filedata = [awsemfile.s3.read_s3(_) for _ in datafiles] reportdata = awsemfile.s3.read_s3(report_html) report_html = accession + 'qc_report.html' awsemfile.s3.s3_put(reportdata, report_html, acl='public-read') qc_url = 'https://s3.amazonaws.com/' + awsemfile.bucket + '/' + report_html files = {report_html: {'data': reportdata, 's3key': qc_url}} # schema. do not need to check_queue qc_schema = ff_utils.get_metadata("profiles/" + quality_metric + ".json", key=ff_key, ff_env=tibanna.env) # parse fastqc metadata if report_html in files: qc_url = files[report_html]['s3key'] else: qc_url = None meta = parse_qc_table(filedata, qc_schema=qc_schema.get('properties'), url=qc_url) if jsondata: meta.update(jsondata) # custom fields if other_fields: for field in other_fields: meta.update(other_fields) printlog("qc meta is %s" % meta) # post fastq metadata qc_meta = ff_utils.post_metadata(meta, quality_metric, key=ff_key) if qc_meta.get('@graph'): qc_meta = qc_meta['@graph'][0] printlog("qc_meta is %s" % qc_meta) # update original file as well try: original_file = ff_utils.get_metadata(accession, key=ff_key, ff_env=tibanna.env, add_on='frame=object', check_queue=True) printlog("original_file is %s" % original_file) except Exception as e: raise Exception( "Couldn't get metadata for accession {} : ".format(accession) + str(e)) patch_file = {'quality_metric': qc_meta['@id']} try: ff_utils.patch_metadata(patch_file, original_file['uuid'], key=ff_key) except Exception as e: raise Exception("patch_metadata failed in fastqc_updater." + str(e) + "original_file ={}\n".format(str(original_file))) # patch the workflow run, value_qc is used to make drawing graphs easier. output_files = ff_meta.output_files output_files[0]['value_qc'] = qc_meta['@id'] retval = {'output_files': output_files} printlog("retval is %s" % retval) return retval
def _qc_updater(status, awsemfile, ff_meta, tibanna, quality_metric='quality_metric_fastqc', file_argument='input_fastq', report_html=None, datafiles=None): # avoid using [] as default argument if datafiles is None: datafiles = ['summary.txt', 'fastqc_data.txt'] if status == 'uploading': # wait until this bad boy is finished return # keys ff_key = tibanna.ff_keys # move files to proper s3 location # need to remove sbg from this line accession = awsemfile.runner.all_file_accessions[file_argument] zipped_report = awsemfile.key files_to_parse = datafiles if report_html: files_to_parse.append(report_html) LOG.info("accession is %s" % accession) try: files = awsemfile.s3.unzip_s3_to_s3(zipped_report, accession, files_to_parse, acl='public-read') except Exception as e: LOG.info(tibanna.s3.__dict__) raise Exception("%s (key={})\n".format(zipped_report) % e) # schema. do not need to check_queue qc_schema = ff_utils.get_metadata("profiles/" + quality_metric + ".json", key=ff_key, ff_env=tibanna.env) # parse fastqc metadata LOG.info("files : %s" % str(files)) filedata = [files[_]['data'] for _ in datafiles] if report_html in files: qc_url = files[report_html]['s3key'] else: qc_url = None meta = parse_qc_table(filedata, qc_schema=qc_schema.get('properties'), url=qc_url) LOG.info("qc meta is %s" % meta) # post fastq metadata qc_meta = ff_utils.post_metadata(meta, quality_metric, key=ff_key) if qc_meta.get('@graph'): qc_meta = qc_meta['@graph'][0] LOG.info("qc_meta is %s" % qc_meta) # update original file as well try: original_file = ff_utils.get_metadata(accession, key=ff_key, ff_env=tibanna.env, add_on='frame=object', check_queue=True) LOG.info("original_file is %s" % original_file) except Exception as e: raise Exception( "Couldn't get metadata for accession {} : ".format(accession) + str(e)) patch_file = {'quality_metric': qc_meta['@id']} try: ff_utils.patch_metadata(patch_file, original_file['uuid'], key=ff_key) except Exception as e: raise Exception("patch_metadata failed in fastqc_updater." + str(e) + "original_file ={}\n".format(str(original_file))) # patch the workflow run, value_qc is used to make drawing graphs easier. output_files = ff_meta.output_files output_files[0]['value_qc'] = qc_meta['@id'] retval = { "output_quality_metrics": [{ "name": quality_metric, "value": qc_meta['@id'] }], 'output_files': output_files } LOG.info("retval is %s" % retval) return retval