def upload_authorship(self, data_file: List[str], author_ids: dict, article_ids: dict): """ Uploads the authorship to the server. :param data_file: names of json files :param author_ids: mapping of author to its _id on server :param article_ids: mapping of article to its _id on server :return: None """ # create author-article relationship relationship = [] for file in data_file: self.logger.info('Process {}'.format(file)) data = pickle.load(open(file, 'rb')) for d in data: users = d['author'] article = d['article'] if article['ref'] not in article_ids: self.logger.warning( 'article {} is not found in server'.format( article['ref'])) continue for u in users: # find _id of author affiliation = u['affiliation'] if len(affiliation) < 1: key = (u['name'], article['name']) else: key = (u['name'], affiliation[0]) key = self.authors.find(key) if key not in author_ids: self.logger.warning( 'user {} is not found in server'.format(u['name'])) continue user_id = author_ids[key] r = create_relationship() r['srcId'] = user_id r['dstId'] = article_ids[article['ref']] r['name'] = 'Author' r['type'] = 5 relationship.append(r) if len(relationship) > 1000: response = add_record('relationship', relationship) if response['_status'] != 'OK': self.logger.error( 'fail to create authorship due to {}'.format( response)) relationship = [] if len(relationship) > 0: response = add_record('relationship', relationship) if response['_status'] != 'OK': self.logger.error( 'fail to create authorship due to {}'.format(response))
def upload_to_server(work_directory): book = load_workbook(os.path.join(work_directory, 'japan.xlsx')) log = create_logger('japan-pmda') log.critical(datetime.datetime.now()) for sheet_name in ('device', 'drug'): sheet = book.get_sheet_by_name(sheet_name) first_row = True for row in sheet.rows: if first_row: first_row = False continue cells = [] for c in row: if isinstance(c.value, str): cells.append(replace_carriage(c.value)) elif c.value is None: cells.append('') else: cells.append(c.value) if sheet_name == 'device': p, a = parse_device(cells) else: p, a = parse_drug(cells) if len(p['name']) < 1 or len(a['name']) < 1: log.warning('invalid record for {}'.format(p['name'])) continue response = add_record('entity', [p, a]) if response['_status'] != 'OK': log.error('fail to create record for {}'.format(p['name'])) log.error(response) continue applicant_product = create_relationship( response['_items'][1]['_id'], response['_items'][0]['_id']) applicant_product['type'] = 7 applicant_product['name'] = 'Applicant' applicant_product['abs'] = 'Applicant' response = add_record('relationship', [applicant_product]) if response['_status'] != 'OK': log.error('fail to create relationship for {}'.format( p['name'])) log.error(response) else: log.debug('added {} to the system'.format(p['name'])) log.critical(datetime.datetime.now())
def upload_reference(self, data_file: List[str], article_ids: dict): """ Uploads the reference information for the article. :param data_file: :param article_ids: a dict mapping of pubmed id of the article to the id in the database :return: list of names of files generated from preprocess """ for file in data_file: self.logger.info('Process {}'.format(file)) data = pickle.load(open(file, 'rb')) for d in data: reference = [r for r in d['reference'] if isinstance(r, str)] article = d['article'] if article['ref'] not in article_ids: self.logger.warning( 'article {} is not found in server'.format( article['ref'])) continue relationship = [] for u in reference: if article['ref'] not in article_ids: self.logger.warning( 'reference {} is not found in server'.format( article_ids[u])) continue r = create_relationship() r['srcId'] = article_ids[article['ref']] r['dstId'] = article_ids[u] r['name'] = 'Reference' r['type'] = 14 relationship.append(r) response = add_record('relationship', relationship) if response['_status'] != 'OK': self.logger.error( 'fail to create author article relationship for {}'. format(article['name']))
def main(): product_code = get_product_code() log = create_logger('510K') result = json.load(open(os.path.expanduser('~/work/fda/device-classification-0001-of-0001.json'), 'r')) log.critical(datetime.datetime.now()) for r in result['results']: p = create_product() p['name'] = r.get('device_name', r['openfda'].get('device_name', '')) p['ref'] = r.get('k_number', r['openfda'].get('k_number', '')) p['addr']['line1'] = r.get('address_1', r['openfda'].get('address_1', '')) p['addr']['line2'] = r.get('address_2', r['openfda'].get('address_2', '')) p['addr']['city'] = r.get('city', r['openfda'].get('city', '')) p['addr']['state'] = r.get('state', r['openfda'].get('state', '')) p['addr']['zip'] = r.get('zip_code', r['openfda'].get('zip_code', '')) p['addr']['country'] = r.get('country_code', r['openfda'].get('country_code', '')) p['intro'] = r.get('statement_or_summary', r['openfda'].get('statement_or_summary', '')) p['asset']['type'] = 0 p['tag'] = [ r.get('advisory_committee_description', r['openfda'].get('advisory_committee_description', '')), r.get('medical_specialty_description', r['openfda'].get('medical_specialty_description', '')), 'FDA', 'Medical Device', '510K'] # p['tag'] is used for tags readable to common users, p['lic'] is used for tags specified for product. p['asset']['lic'] = [ 'FDA', '510K', r.get('clearance_type', r['openfda'].get('clearance_type', '')), r.get('advisory_committee_description', r['openfda'].get('advisory_committee_description', '')), r['openfda'].get('medical_specialty_description', ''), r.get('product_code', r['openfda'].get('product_code', '')), r.get('regulation_number', r['openfda'].get('regulation_number', '')), r.get('decision_description', r['openfda'].get('decision_description', '')), ] p['asset']['lic'].extend(third_party(r.get('third_party_flag', r['openfda'].get('third_party_flag', '')))) if len(r.get('expedited_review_flag', r['openfda'].get('expedited_review_flag', ''))) > 0: p['asset']['lic'].append('Expedited Review') if r.get('submission_type_id', r['openfda'].get('submission_type_id', '')) not in {'1', '2'} and \ submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))) is not None: p['asset']['lic'].append( submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', '')))) p['tag'].append(submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', '')))) code = product_code.get(r.get('product_code', r['openfda'].get('product_code', '')), None) if code is not None: p['abs'] = code['device_name'] p['asset']['lic'].extend([ 'Class ' + code['device_class'], 'GMP Exempt' if code['gmp_exempt_flag'] == 'N' else 'GMP Required', ]) p['tag'].append('Class ' + code['device_class']) if code['implant_flag'] != 'N': p['asset']['lic'].append('Implant') p['tag'].append('Implant') if code['life_sustain_support_flag'] != 'N': p['asset']['lic'].append('Life Sustain Support') p['tag'].append('Life Sustain Support') else: p['abs'] = p['name'] p['asset']['stat'] = map_status(r.get('decision_code', r['openfda'].get('decision_code', ''))) try: p['created'] = parser.parse(r.get('date_received', r['openfda'].get('date_received', None))).strftime( "%a, %d %b %Y %H:%M:%S GMT") except: pass try: p['updated'] = parser.parse(r.get('decision_date', r['openfda'].get('decision_date', None))).strftime( "%a, %d %b %Y %H:%M:%S GMT") except: pass p['asset']['lic'] = remove_empty_string_from_array(p['asset']['lic']) p['tag'] = remove_empty_string_from_array(p['tag']) a = create_company() a['name'] = r.get('applicant', r['openfda'].get('applicant', '')) a['abs'] = 'A Medical Device Company' a['addr'] = p['addr'] a['tag'] = p['tag'] a['group']['parentId'] = '000000000000000000000000' # contact is just the name of contact response = add_record('entity', [p, a]) if response['_status'] != 'OK': log.error('fail to create record for {}'.format(p['name'])) continue applicant_product = create_relationship(response['_items'][1]['_id'], response['_items'][0]['_id']) applicant_product['type'] = 7 applicant_product['name'] = 'Applicant' applicant_product['abs'] = 'Applicant' response = add_record('relationship', [applicant_product]) if response['_status'] != 'OK': log.error('fail to create relationship for {}'.format(p['name'])) else: log.debug('added {} to the system'.format(p['name'])) log.critical(datetime.datetime.now())