コード例 #1
0
ファイル: pubmed.py プロジェクト: zhangtemplar/Button-Data
    def upload_authorship(self, data_file: List[str], author_ids: dict,
                          article_ids: dict):
        """
        Uploads the authorship to the server.

        :param data_file: names of json files
        :param author_ids: mapping of author to its _id on server
        :param article_ids: mapping of article to its _id on server
        :return: None
        """
        # create author-article relationship
        relationship = []
        for file in data_file:
            self.logger.info('Process {}'.format(file))
            data = pickle.load(open(file, 'rb'))
            for d in data:
                users = d['author']
                article = d['article']
                if article['ref'] not in article_ids:
                    self.logger.warning(
                        'article {} is not found in server'.format(
                            article['ref']))
                    continue
                for u in users:
                    # find _id of author
                    affiliation = u['affiliation']
                    if len(affiliation) < 1:
                        key = (u['name'], article['name'])
                    else:
                        key = (u['name'], affiliation[0])
                    key = self.authors.find(key)
                    if key not in author_ids:
                        self.logger.warning(
                            'user {} is not found in server'.format(u['name']))
                        continue
                    user_id = author_ids[key]
                    r = create_relationship()
                    r['srcId'] = user_id
                    r['dstId'] = article_ids[article['ref']]
                    r['name'] = 'Author'
                    r['type'] = 5
                    relationship.append(r)
                if len(relationship) > 1000:
                    response = add_record('relationship', relationship)
                    if response['_status'] != 'OK':
                        self.logger.error(
                            'fail to create authorship due to {}'.format(
                                response))
                    relationship = []
        if len(relationship) > 0:
            response = add_record('relationship', relationship)
            if response['_status'] != 'OK':
                self.logger.error(
                    'fail to create authorship due to {}'.format(response))
コード例 #2
0
ファイル: pubmed.py プロジェクト: zhangtemplar/Button-Data
    def upload_author(self, author_impact: dict) -> dict:
        """
        Upload the unique authors to the database.

        :param author_impact: author citation data
        :return: a dictionary using the ref of author as key and its _id in database as value
        """

        # find unique author
        author_dict = self.authors.all_elements()

        # upload the user to the server
        users = []
        user_ids = {}
        for a in author_dict:
            user = create_user()
            user['name'] = a[0]
            user['abs'] = a[1]
            user['ref'] = a[1]
            user['contact']['email'] = normalize_email(a[1])
            user['contact']['phone'] = normalize_phone(a[1])
            user['exp']['impact'] = author_impact[a][
                'citation'] if a in author_impact else 0
            user['exp']['impact'] = author_impact[a][
                'keyword'] if a in author_impact else []
            user['onepage']['bg'] = json.dumps([u[1] for u in author_dict[a]])
            address = self.parse_address(a[1])
            if address is not None:
                user['addr'] = address
            else:
                user['addr']['city'] = 'Unknown'
                user['addr']['country'] = 'Unknown'
            users.append(user)
            if len(users) >= 1000:
                response = add_record('entity', users)
                if response['_status'] != 'OK':
                    self.logger.error('fail to create user'.format(a))
                else:
                    for u, r in zip(users, response['_items']):
                        user_ids[(u['name'], u['abs'])] = r['_id']
                    users = []
        if len(users) > 0:
            response = add_record('entity', users)
            if response['_status'] != 'OK':
                self.logger.error('fail to create user'.format(a))
            else:
                for u, r in zip(users, response['_items']):
                    user_ids[(u['name'], u['abs'])] = r['_id']
        del users
        pickle.dump(user_ids, open('pubmed_author_ids.cp', 'wb'))

        return user_ids
コード例 #3
0
ファイル: pubmed.py プロジェクト: zhangtemplar/Button-Data
    def upload_article(self, data_file: List[str]) -> dict:
        """
        Uploads the article to server and returns the mapping of pubmed id of the article to the id in the database

        :param data_file: list of names of files generated from preprocess
        :param author_ids: dictionary of (author name, author first affiliation) to author's id in database
        :return: a dict mapping of pubmed id of the article to the id in the database
        """
        article_ids = {}
        for file in data_file:
            self.logger.info('Process {}'.format(file))
            data = pickle.load(open(file, 'rb'))
            for d in data:
                article = d['article']
                if len(article['abs']) < 1:
                    article['abs'] = article['name']
                article['addr']['city'] = 'unknown'
                article['addr']['country'] = 'unknown'
                response = add_record('entity', article)
                if response['_status'] != 'OK':
                    self.logger.error(
                        'fail to create article for {} due to {}'.format(
                            article['name'], response))
                    continue
                article_ids[article['ref']] = response['_items']['_id']
        pickle.dump(article_ids, open('pubmed_article_ids.cp', 'wb'))
        return article_ids
コード例 #4
0
def upload_to_server(work_directory):
    book = load_workbook(os.path.join(work_directory, 'japan.xlsx'))
    log = create_logger('japan-pmda')
    log.critical(datetime.datetime.now())
    for sheet_name in ('device', 'drug'):
        sheet = book.get_sheet_by_name(sheet_name)
        first_row = True
        for row in sheet.rows:
            if first_row:
                first_row = False
                continue
            cells = []
            for c in row:
                if isinstance(c.value, str):
                    cells.append(replace_carriage(c.value))
                elif c.value is None:
                    cells.append('')
                else:
                    cells.append(c.value)
            if sheet_name == 'device':
                p, a = parse_device(cells)
            else:
                p, a = parse_drug(cells)
            if len(p['name']) < 1 or len(a['name']) < 1:
                log.warning('invalid record for {}'.format(p['name']))
                continue
            response = add_record('entity', [p, a])
            if response['_status'] != 'OK':
                log.error('fail to create record for {}'.format(p['name']))
                log.error(response)
                continue
            applicant_product = create_relationship(
                response['_items'][1]['_id'], response['_items'][0]['_id'])
            applicant_product['type'] = 7
            applicant_product['name'] = 'Applicant'
            applicant_product['abs'] = 'Applicant'
            response = add_record('relationship', [applicant_product])
            if response['_status'] != 'OK':
                log.error('fail to create relationship for {}'.format(
                    p['name']))
                log.error(response)
            else:
                log.debug('added {} to the system'.format(p['name']))
    log.critical(datetime.datetime.now())
コード例 #5
0
ファイル: pubmed.py プロジェクト: zhangtemplar/Button-Data
    def upload_reference(self, data_file: List[str], article_ids: dict):
        """
        Uploads the reference information for the article.

        :param data_file:
        :param article_ids: a dict mapping of pubmed id of the article to the id in the database
        :return: list of names of files generated from preprocess
        """
        for file in data_file:
            self.logger.info('Process {}'.format(file))
            data = pickle.load(open(file, 'rb'))
            for d in data:
                reference = [r for r in d['reference'] if isinstance(r, str)]
                article = d['article']
                if article['ref'] not in article_ids:
                    self.logger.warning(
                        'article {} is not found in server'.format(
                            article['ref']))
                    continue
                relationship = []
                for u in reference:
                    if article['ref'] not in article_ids:
                        self.logger.warning(
                            'reference {} is not found in server'.format(
                                article_ids[u]))
                        continue
                    r = create_relationship()
                    r['srcId'] = article_ids[article['ref']]
                    r['dstId'] = article_ids[u]
                    r['name'] = 'Reference'
                    r['type'] = 14
                    relationship.append(r)
                response = add_record('relationship', relationship)
                if response['_status'] != 'OK':
                    self.logger.error(
                        'fail to create author article relationship for {}'.
                        format(article['name']))
コード例 #6
0
ファイル: fda_510k.py プロジェクト: zhangtemplar/Button-Data
def main():
    product_code = get_product_code()
    log = create_logger('510K')
    result = json.load(open(os.path.expanduser('~/work/fda/device-classification-0001-of-0001.json'), 'r'))
    log.critical(datetime.datetime.now())
    for r in result['results']:
        p = create_product()
        p['name'] = r.get('device_name', r['openfda'].get('device_name', ''))
        p['ref'] = r.get('k_number', r['openfda'].get('k_number', ''))
        p['addr']['line1'] = r.get('address_1', r['openfda'].get('address_1', ''))
        p['addr']['line2'] = r.get('address_2', r['openfda'].get('address_2', ''))
        p['addr']['city'] = r.get('city', r['openfda'].get('city', ''))
        p['addr']['state'] = r.get('state', r['openfda'].get('state', ''))
        p['addr']['zip'] = r.get('zip_code', r['openfda'].get('zip_code', ''))
        p['addr']['country'] = r.get('country_code', r['openfda'].get('country_code', ''))
        p['intro'] = r.get('statement_or_summary', r['openfda'].get('statement_or_summary', ''))
        p['asset']['type'] = 0
        p['tag'] = [
            r.get('advisory_committee_description', r['openfda'].get('advisory_committee_description', '')),
            r.get('medical_specialty_description', r['openfda'].get('medical_specialty_description', '')),
            'FDA',
            'Medical Device',
            '510K']
        # p['tag'] is used for tags readable to common users, p['lic'] is used for tags specified for product.
        p['asset']['lic'] = [
            'FDA',
            '510K',
            r.get('clearance_type', r['openfda'].get('clearance_type', '')),
            r.get('advisory_committee_description', r['openfda'].get('advisory_committee_description', '')),
            r['openfda'].get('medical_specialty_description', ''),
            r.get('product_code', r['openfda'].get('product_code', '')),
            r.get('regulation_number', r['openfda'].get('regulation_number', '')),
            r.get('decision_description', r['openfda'].get('decision_description', '')), ]
        p['asset']['lic'].extend(third_party(r.get('third_party_flag', r['openfda'].get('third_party_flag', ''))))
        if len(r.get('expedited_review_flag', r['openfda'].get('expedited_review_flag', ''))) > 0:
            p['asset']['lic'].append('Expedited Review')
        if r.get('submission_type_id', r['openfda'].get('submission_type_id', '')) not in {'1', '2'} and \
                submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))) is not None:
            p['asset']['lic'].append(
                submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))))
            p['tag'].append(submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))))
        code = product_code.get(r.get('product_code', r['openfda'].get('product_code', '')), None)
        if code is not None:
            p['abs'] = code['device_name']
            p['asset']['lic'].extend([
                'Class ' + code['device_class'],
                'GMP Exempt' if code['gmp_exempt_flag'] == 'N' else 'GMP Required',
            ])
            p['tag'].append('Class ' + code['device_class'])
            if code['implant_flag'] != 'N':
                p['asset']['lic'].append('Implant')
                p['tag'].append('Implant')
            if code['life_sustain_support_flag'] != 'N':
                p['asset']['lic'].append('Life Sustain Support')
                p['tag'].append('Life Sustain Support')
        else:
            p['abs'] = p['name']
        p['asset']['stat'] = map_status(r.get('decision_code', r['openfda'].get('decision_code', '')))
        try:
            p['created'] = parser.parse(r.get('date_received', r['openfda'].get('date_received', None))).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
        try:
            p['updated'] = parser.parse(r.get('decision_date', r['openfda'].get('decision_date', None))).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
        p['asset']['lic'] = remove_empty_string_from_array(p['asset']['lic'])
        p['tag'] = remove_empty_string_from_array(p['tag'])
        a = create_company()
        a['name'] = r.get('applicant', r['openfda'].get('applicant', ''))
        a['abs'] = 'A Medical Device Company'
        a['addr'] = p['addr']
        a['tag'] = p['tag']
        a['group']['parentId'] = '000000000000000000000000'
        # contact is just the name of contact

        response = add_record('entity', [p, a])
        if response['_status'] != 'OK':
            log.error('fail to create record for {}'.format(p['name']))
            continue
        applicant_product = create_relationship(response['_items'][1]['_id'], response['_items'][0]['_id'])
        applicant_product['type'] = 7
        applicant_product['name'] = 'Applicant'
        applicant_product['abs'] = 'Applicant'
        response = add_record('relationship', [applicant_product])
        if response['_status'] != 'OK':
            log.error('fail to create relationship for {}'.format(p['name']))
        else:
            log.debug('added {} to the system'.format(p['name']))
    log.critical(datetime.datetime.now())