def submit_physical_measurements(self, participant_id, site): """ Create a physical measurements response for the participant :param participant_id: participant id :param site: HPOSiteGen object :return: True if POST request is successful otherwise False. """ if not self._pm_gen: self._pm_gen = PhysicalMeasurementsGen() pm_obj = self._pm_gen.new(participant_id, site) data = dict() data['api'] = 'Participant/{0}/PhysicalMeasurements'.format( participant_id) data['data'] = pm_obj.make_fhir_document() # make the submit time a little later than the authored timestamp. data['timestamp'] = clock.CLOCK.now().isoformat() code, resp = make_api_request(self._host, self._gen_url, req_type='POST', json_data=data, headers=gcp_make_auth_header()) if code == 200: pm_obj.update(resp) return pm_obj raise ValueError( 'invalid response, failed to create module response [Http {0}: {1}].' .format(code, resp))
def _download_questions(self, module): """ Download fhir questionnaire from rdr service. :param module: questionnaire module name :return: fhir questionnaire object """ data = dict() data['api'] = 'Questionnaire?concept={0}'.format(module) data['timestamp'] = clock.CLOCK.now().isoformat() data['method'] = 'GET' code, resp = make_api_request(self._rdr_host, self._gen_url, req_type='POST', json_data=data, headers=gcp_make_auth_header()) if code != 200: _logger.error( 'failed to get module questionnaire [Http {0}: {1}.'.format( code, resp)) return None questions = Questionnaire(resp, strict=False) if questions: return questions _logger.error('failed to parse the module questionnaire data.') return None
def submit_module_response(self, module_id, participant_id, overrides=None): """ Create a questionnaire response for the given module. :param module_id: questionnaire module name :param participant_id: participant id :param overrides: list of tuples giving answers to specific questions. :return: True if POST request is successful otherwise False. """ if not module_id or not isinstance(module_id, str): raise ValueError('invalid module id.') if not participant_id or not isinstance(str(participant_id), str): raise ValueError('invalid participant id.') if not self._cb: # We only want to create these once, because they download data from github. self._cb = CodeBook() self._qn_gen = QuestionnaireGen(self._cb, self._host) qn_obj = self._qn_gen.new(module_id, participant_id, overrides) data = dict() data['api'] = 'Participant/{0}/QuestionnaireResponse'.format(participant_id) data['data'] = qn_obj.make_fhir_document() # make the submit time a little later than the authored timestamp. data['timestamp'] = clock.CLOCK.now().isoformat() code, resp = make_api_request(self._host, self._gen_url, req_type='POST', json_data=data, headers=gcp_make_auth_header()) if code == 200: qn_obj.update(resp) return qn_obj _logger.error('module response failure: [Http {0}: {1}].'.format(code, resp)) return None
def submit_biobank_order(self, participant_id, sample_test, site, to_mayo=False): """ Create a biobank order response for the participant :param participant_id: participant id :param sample_test: sample test code :param site: HPOSiteGen object :param to_mayo: if True, also send order to Mayolink. :return: True if POST request is successful otherwise False. """ if not sample_test: return None if not self._bio_gen: self._bio_gen = BioBankOrderGen() bio_obj = self._bio_gen.new(participant_id, sample_test, site) data = dict() data['api'] = 'Participant/{0}/BiobankOrder'.format(participant_id) data['data'], finalized = bio_obj.make_fhir_document() # make the submit time a little later than the finalized timestamp. data['timestamp'] = self._increment_date(finalized, minute_range=15).isoformat() data['mayolink'] = to_mayo code, resp = make_api_request(self._host, self._gen_url, req_type='POST', json_data=data, headers=gcp_make_auth_header()) if code == 200: bio_obj.update(resp) return bio_obj _logger.error('biobank order response failure: [Http {0}: {1}].'.format(code, resp)) return None
def generate_data_from_file(self): reader = self._read_csv_lines(self.args.create_samples_from_file) _logger.info('requesting pm&b for participants') for item in reader: # pylint: disable=unused-variable code, resp = make_api_request(self._host, self._gen_url, req_type='POST', json_data=item, headers=gcp_make_auth_header()) if code != 200: _logger.error('request failed')
def generate_fake_data(self): total_participants_created = 0 while total_participants_created < self.args.num_participants: participants_for_batch = min(self.MAX_PARTICIPANTS_PER_REQUEST, self.args.num_participants - total_participants_created) request_body = {'num_participants': participants_for_batch, 'include_physical_measurements': self.args.include_physical_measurements, 'include_biobank_orders': self.args.include_biobank_orders} if self.args.hpo: request_body['hpo'] = self.args.hpo _logger.info('generating batch of [{0}] participants.'.format(participants_for_batch)) num_consecutive_errors = 0 while num_consecutive_errors <= self.MAX_CONSECUTIVE_ERRORS: code, resp = make_api_request(self._host, self._gen_url, req_type='POST', json_data=request_body, headers=gcp_make_auth_header()) if code == 200: break _logger.error('{0} [{1}]'.format(code, resp)) num_consecutive_errors += 1 sleep(self.SLEEP_TIME_AFTER_ERROR_SECONDS) if num_consecutive_errors > self.MAX_CONSECUTIVE_ERRORS: raise IOError("more than {0} consecutive errors; bailing out.".format(self.MAX_CONSECUTIVE_ERRORS)) total_participants_created += participants_for_batch _logger.info('total participants created: [{0}].'.format(total_participants_created)) if self.args.create_biobank_samples: _logger.info('requesting Biobank sample generation.') code, resp = make_api_request(self._host, self._gen_url, req_type='POST', json_data={'create_biobank_samples': True}, headers=gcp_make_auth_header()) if code != 200: _logger.error('request to generate biobank samples failed.') else: _logger.info( 'biobank samples are being generated asynchronously.' ' wait until done, then use the cron tab in AppEngine to start the samples pipeline.')
def create_participant(self, site_id=None, hpo_id=None): """ Create a new participant with a random or specific hpo or site id :param site_id: name of specific hpo site :param hpo_id: name of hpo :return: participant object """ hpo_site = None hpo_gen = HPOGen() if site_id: # if site_id is given, it also returns the HPO the site is matched with. hpo_site = hpo_gen.get_site(site_id) if hpo_id and not hpo_site: # if hpo is given, select a random site within the hpo. hpo_site = hpo_gen.get_hpo(hpo_id).get_random_site() if not hpo_site: # choose a random hpo and site. hpo_site = hpo_gen.get_random_site() # initialize participant generator. if not self._p_gen: self._p_gen = ParticipantGen() # make a new participant. p_obj = self._p_gen.new(hpo_site) data = dict() data['api'] = 'Participant' data['data'] = p_obj.to_dict() data['timestamp'] = clock.CLOCK.now().isoformat() code, resp = make_api_request(self._host, self._gen_url, req_type='POST', json_data=data, headers=gcp_make_auth_header()) if code == 200 and resp: p_obj.update(resp) return p_obj, hpo_site raise ValueError( 'invalid response, failed to create participant [Http {0}: {1}].'. format(code, resp))
def run(self): """ Main program process :return: Exit code value """ # TODO: Future: these two blocks of code should be replaced by Tanner's new config bucket for sites csv files. # Copy the bucket config file to a temp file _logger.info('retrieving configuration...') tmpfile = os.path.join(tempfile.gettempdir(), next(tempfile._get_candidate_names())) gcp_cp(HPO_REPORT_CONFIG_GCS_PATH, tmpfile) # Read the tempfile and get the sheet_id from it. config = json.loads(open(tmpfile).read()) sheet_id = config.get('hpo_report_google_sheet_id') # delete tempfile os.remove(tmpfile) # download the sheet in csv format _logger.info('retrieving sites config...') sheet_url = 'spreadsheets/d/{0}/export?format=csv'.format(sheet_id) resp_code, resp_data = make_api_request('docs.google.com', sheet_url, ret_type='text') if resp_code != 200: _logger.error(resp_data) _logger.error('failed to retrieve site information, aborting.') return 1 # TODO end # Load the site info we need into a dict. sites = dict() handle = StringIO.StringIO(resp_data) sites_reader = csv.DictReader(handle) for row in sites_reader: if row['Org ID']: sites[row['Org ID']] = {'aggregating_org_id': row['Aggregating Org ID'], 'bucket_name': row['Bucket Name']} _logger.info('retrieving db configuration...') headers = gcp_make_auth_header() resp_code, resp_data = make_api_request( '{0}.appspot.com'.format(self.gcp_env.project), '/rdr/v1/Config/db_config', headers=headers) if resp_code != 200: _logger.error(resp_data) _logger.error('failed to retrieve config, aborting.') return 1 passwd = resp_data['rdr_db_password'] if not passwd: _logger.error('failed to retrieve database user password from config.') return 1 # connect a sql proxy to the current project _logger.info('starting google sql proxy...') port = random.randint(10000, 65535) instances = gcp_format_sql_instance(self.gcp_env.project, port=port) proxy_pid = self.gcp_env.activate_sql_proxy(instances) if not proxy_pid: _logger.error('activating google sql proxy failed.') return 1 try: _logger.info('connecting to mysql instance...') sql_conn = MySQLdb.connect(host='127.0.0.1', user='******', passwd=str(passwd), db='rdr', port=port) cursor = sql_conn.cursor() _logger.info('retrieving participant information...') # get record count if self.args.org_id: cursor.execute(COUNT_SQL, (self.args.org_id, )) else: sql = COUNT_SQL.replace('where organization.external_id = %s', 'where') cursor.execute(sql) rec = cursor.fetchone() total_recs = rec[0] if self.args.org_id: cursor.execute(PARTICIPANT_SQL, (self.args.org_id, )) else: sql = PARTICIPANT_SQL.replace('where organization.external_id = %s', 'where') cursor.execute(sql) _logger.info('transferring files to destinations...') count = 0 rec = cursor.fetchone() while rec: if not self.args.debug: print_progress_bar(count, total_recs, prefix='{0}/{1}:'.format(count, total_recs), suffix='complete') p_id = rec[0] site = rec[1] if self.args.destination_bucket is not None: # override destination bucket lookup (the lookup table is incomplete) bucket = self.args.destination_bucket else: site_info = sites.get(rec[2]) if not site_info: _logger.warn('\nsite info not found for [{0}].'.format(rec[2])) continue bucket = site_info.get('bucket_name') if not bucket: _logger.warn('\nno bucket name found for [{0}].'.format(rec[2])) continue src_bucket = SOURCE_BUCKET.format(p_id=p_id) dest_bucket = DEST_BUCKET.format(bucket_name=bucket, org_external_id=self.args.org_id, site_name=site if site else 'no-site-assigned', p_id=p_id) _logger.debug(' Participant: {0}'.format(p_id)) _logger.debug(' src: {0}'.format(src_bucket)) _logger.debug(' dest: {0}'.format(dest_bucket)) if not self.args.dry_run: # gsutil -m cp -r -n gs://src/ gs://dest gcp_cp(src_bucket, dest_bucket, args='-r', flags='-m') count += 1 rec = cursor.fetchone() # print progressbar one more time to show completed. if not rec and not self.args.debug: print_progress_bar(count, total_recs, prefix='{0}/{1}:'.format(count, total_recs), suffix='complete') cursor.close() sql_conn.close() except MySQLdb.OperationalError as e: _logger.error('failed to connect to {0} mysql instance. [{1}]'.format(self.gcp_env.project, e)) return 0