예제 #1
0
    def get_patents(self, response: Response) -> list:
        """
        Obtain the patents.

        :param response: response
        :return list of patents
        """
        patents = []
        for row in response.xpath(
                "//div[contains(@class, 'field-collection-item-field-ip-info')]/div[@class='content']"
        ):
            title = row.xpath(
                "string(div[contains(@class, 'field-name-field-ip-title')])"
            ).get()
            tag = row.xpath(
                "string(div[contains(@class, 'field-name-field-ip-type')])"
            ).get()
            link = row.xpath(
                "div[contains(@class, 'field-name-field-ip-number-pctwo') or contains(@class, 'field-name-field-ip-number-pat-pend')]//a/@href"
            ).get()
            patent = create_product()
            patent['asset']['type'] = 1
            patent['ref'] = link if link is not None else ''
            patent['contact']['website'] = link if link is not None else ''
            patent['name'] = title
            patent['tag'] = remove_empty_string_from_array([tag])
        return patents
예제 #2
0
    def parse_import_cosmetic(self, table: dict):
        product = create_product()
        product['name'] = self.use_nonempty(table['产品名称(中文)'],
                                            table['产品名称(英文)'])
        product['abs'] = self.use_nonempty(table['产品名称(英文)'],
                                           table['产品名称(中文)'])
        product['intro'] = table['产品名称备注']
        product['asset']['lic'] = [table['批准文号']]
        product['addr']['line1'] = table['生产企业地址']
        product['addr']['city'] = 'Unknown'
        product['addr']['country'] = table['生产国(地区)']
        product['tag'] = [table['产品类别'], 'Cosmetic']
        product['asset']['tech'] = self._dict_to_markdown_table(
            {k: table[k]
             for k in ('批件状态', '批准日期', '批件有效期', '备注')})

        company = create_company()
        company['name'] = self.use_nonempty(table['生产企业(中文)'],
                                            table['生产企业(英文)'])
        company['addr']['city'] = 'Unknown'
        company['addr']['line1'] = table['生产企业地址']
        company['addr']['country'] = table['生产国(地区)']

        delegate = create_company()
        delegate['name'] = table['在华申报责任单位']
        delegate['addr']['city'] = 'Unknown'
        delegate['addr']['line1'] = table['在华申报责任单位']
        delegate['addr']['country'] = 'China'

        return {'product': product, 'company': company, 'delegate': delegate}
예제 #3
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url),
                 level=logging.INFO)
        name = self.parse_name_from_url(response.url)
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['ref'] = response.url
        product['logo'] = response.xpath(
            "//div[contains(@class, 'field-type-image')]//img/@src").get()
        if product['logo'] is None:
            product['logo'] = ''
        product['contact']['website'] = response.url
        product['name'] = response.xpath(self.title_xpath).get()
        product['created'] = self.get_disclosure_date(response)
        if product['created'] is None:
            del product['created']
        product['intro'] = response.xpath(
            "string(//div[@class='field field-name-body field-type-text-with-summary field-label-above']/div[@class='field-items'])"
        ).get()
        product['abs'] = product['intro'][:product['intro'].find('. ') + 1]
        if len(product['abs']) < 1:
            product['abs'] = product['name']
        product['asset']['type'] = 3
        product['addr'] = deepcopy(self.address)
        product['contact'] = self.get_contact(response)
        patents = self.add_patents(response)
        for p in patents:
            p['addr'] = product['addr']
            p['contact'] = product['contact']

        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump({'product': product, 'patent': patents}, fo)
예제 #4
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url),
                 level=logging.INFO)
        name = self.parse_name_from_url(response.url)
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['ref'] = response.url
        product['contact']['website'] = response.url
        product['name'] = response.xpath("//h2/a/text()").get()
        content = response.xpath("//div[@class='bdp']/p/text()").getall()
        if len(content) > 0:
            product['abs'] = content[0][:content[0].find('. ') + 1]
        product['intro'] = '\n'.join(content)
        product['asset']['type'] = 3
        product['addr'] = deepcopy(self.address)
        product['tag'] = self.add_tags(response)
        contact = self.get_contact(response)
        contact['abs'] = 'Contact of ' + product['name']
        contact['addr'] = self.add_tags(response)
        product['contact'] = contact['contact']

        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump({'product': product, 'contact': contact}, fo)
예제 #5
0
 def parse(self, response):
     name = response.url.split('/')[-1]
     with open(os.path.join(self.work_directory, name + '.html'), 'wb') as fo:
         fo.write(response.body)
     data = self.data[response.url]
     p = create_product()
     p['tag'] = [data['Category'] + ' Medicine', 'EU', 'Drug', data['Referral type']]
     p['abs'] = data['International non-proprietary name (INN) / common name']
     p['name'] = data['Referral name']
     p['asset']['stat'] = self.status_code(data['Status of referral'])
     p['updated'] = data['Revision date']
     p['created'] = data['First published']
     p['website'] = response.url
     p['ref'] = response.url
     p['asset']['tech'] = dictionary_to_markdown({key: data[key] for key in (
         'Reference number', 'Decision making model', 'Authorisation model', 'PRAC recommendation',
         'Procedure start date', 'PRAC recommendation date', 'CHMP opinion / CMDh position date', 'EC decision date',
         'Associated name')})
     p['asset']['lic'] = p['tag']
     p['asset']['type'] = 1
     p['intro'] = '\n'.join(response.xpath(
         "//div[contains(@class, 'field-name-field-ema-web-summary')]/div/p/text()").getall())
     p['asset']['market'] = dictionary_to_markdown(self.extract_market(response))
     p['addr']['city'] = 'Unknown'
     p['addr']['country'] = 'EU'
     with open(os.path.join(self.work_directory, name + '.json'), 'w') as fo:
         json.dump(p, fo)
예제 #6
0
    def get_publications(self, response: Response) -> list:
        """
        Obtain the patents.

        :param response: response
        :return list of patents
        """
        patents = []
        for row in response.xpath(
                "//div[contains(@class, 'field-collection-item-field-publications')]/div[@class='content']"
        ):
            title = row.xpath(
                "string(div[contains(@class, 'field-name-field-link')])").get(
                )
            other = row.xpath(
                "string(div[contains(@class, 'field-name-field-date-and-other-info')])"
            ).get()
            link = row.xpath(
                "div[contains(@class, 'field-name-field-link')]//a/@href").get(
                )
            vendor = row.xpath(
                "string(div[contains(@class, 'field-name-field-publication')])"
            ).get()
            patent = create_product()
            patent['asset']['type'] = 8
            patent['ref'] = link
            patent['contact']['website'] = link
            patent['name'] = title
            patent['abs'] = other
            patent['tag'] = remove_empty_string_from_array([vendor])
        return patents
예제 #7
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url),
                 level=logging.INFO)
        name = self.parse_name_from_url(response.url)
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['ref'] = response.url
        product['contact']['website'] = response.url
        product['name'] = response.xpath("string(//h1[@class='title'])").get()
        meta = self.get_meta(response)
        market = extract_dictionary(meta, 'Applications')
        product['asset']['market'] = '\n'.join(market.values())
        for k in market:
            del meta[k]
        product['intro'] = dictionary_to_markdown(meta)
        product['abs'] = product['intro'][:product['intro'].find('. ') + 1]
        if len(product['abs']) < 1:
            product['abs'] = product['intro']
        product['asset']['type'] = 3
        product['addr'] = deepcopy(self.address)
        inventors = self.add_inventors(response)
        for index, user in enumerate(inventors):
            user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']
            user['tag'] = product['tag']
        contact = self.get_contact(response)
        product['contact'] = contact

        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump({'product': product, 'inventors': inventors}, fo)
예제 #8
0
    def parse_domestic_device(self, table: dict):
        product = create_product()
        product['name'] = table['产品名称	']
        product['abs'] = table['适用范围	']
        product['intro'] = table['结构及组成']
        product['asset']['lic'] = [table['注册证编号'], table['产品标准']]
        product['addr']['line1'] = table['生产地址']
        product['addr']['city'] = 'Unknown'
        product['addr']['country'] = '中国'
        product['addr']['zip'] = table['邮编']
        product['tag'] = [table['剂型(中文)'], table['产品类别'], 'Medical Device']
        product['asset']['market'] = self._dict_to_markdown_table({
            k: table[k]
            for k in ('型号、规格', '其他内容', '备注', '批准日期', '有效期至', '变更日期', '变更情况',
                      '审批部门')
        })
        product['asset']['tech'] = self._dict_to_markdown_table({
            k: table[k]
            for k in ('主要组成成分(体外诊断试剂)', '预期用途(体外诊断试剂)', '产品储存条件及有效期(体外诊断试剂)')
        })

        company = create_company()
        company['name'] = table['注册人名称']
        company['addr']['city'] = 'Unknown'
        company['addr']['line1'] = table['注册人住所']
        company['addr']['country'] = 'China'

        delegate = create_company()
        delegate['name'] = table['代理人名称']
        delegate['addr']['city'] = 'Unknown'
        delegate['addr']['line1'] = table['代理人住所']
        delegate['addr']['country'] = 'China'

        return {'product': product, 'company': company, 'delegate': delegate}
예제 #9
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url),
                 level=logging.INFO)
        name = response.url.split('/')[-1]
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['ref'] = response.url
        product['tag'] = remove_empty_string_from_array(
            self.add_keywords(response))
        product['asset']['type'] = 3
        product['addr'] = deepcopy(self.address)
        product['name'] = response.xpath("string(//h1)").get()
        meta = self.get_meta(response)
        contents = meta['abstract'].split('\n')
        if len(contents) > 0 and len(contents[0]) > 0:
            product['abs'] = contents[0]
        else:
            product['abs'] = name
        product['intro'] = '\n'.join(contents[1:])
        del meta['abstract']
        product['asset']['market'] = dictionary_to_markdown(meta)

        manager, product['contact'] = self.get_contact(response)
        product['contact']['website'] = response.url
        inventors = self.add_inventors(response)
        for index, user in enumerate(inventors):
            user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']
            user['tag'] = product['tag']

        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump({'product': product, 'inventors': inventors}, fo)
예제 #10
0
def parse_device(cells):
    review_category = {
        '1':
        'Ophthalmology and otorhinolaryngology',
        '2':
        'dentistry',
        '3':
        'cerebral, cardiovascular, respiratory, psychiatric, and neurological field',
        '3-1':
        'Intervention devices mainly in cerebral, cardiovascular, respiratory, psychiatric, and neurological field',
        '3-2':
        'Non-intervention devices mainly in cerebral, cardiovascular, respiratory, psychiatric, and neurological field',
        '4':
        'cerebral, cardiovascular, respiratory, psychiatric, and neurological field',
        '5':
        'gastrointestinal and urinary systems, obstetrics and gynecology',
        '6':
        'orthopedic/plastic surgery and dermatology',
        '7':
        'laboratory tests, in vitro diagnostics',
        '8':
        'multicategory medical devices, advanced electronic medical devices, and other uncategorized medical devices',
    }
    p = create_product()
    p['name'] = cells[4]
    if isinstance(cells[3], datetime.datetime):
        p['created'] = cells[3].strftime("%a, %d %b %Y %H:%M:%S GMT")
    else:
        try:
            p['created'] = parser.parse(
                cells[3]).strftime("%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
    if isinstance(cells[1], datetime.datetime):
        p['updated'] = cells[1].strftime("%a, %d %b %Y %H:%M:%S GMT")
    else:
        try:
            p['updated'] = parser.parse(
                cells[1]).strftime("%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
    p['tag'] = remove_empty_string_from_array(
        [cells[5], cells[6], 'Japan PMDA', 'Medical Device'])
    p['asset']['lic'] = p['tag']
    p['asset']['stat'] = 2
    p['abs'] = review_category.get(cells[10], cells[10])
    if len(p['abs']) < 1:
        p['abs'] = p['name']
    p['asset']['market'] = cells[9]
    p['addr']['country'] = 'Japan'
    p['addr']['city'] = 'Unknown'

    a = create_company()
    a['name'] = cells[0]
    a['abs'] = 'A Medical Device Company'
    a['addr'] = p['addr']
    a['tag'] = p['tag']
    return p, a
예제 #11
0
 def process_file(self, file: str) -> dict:
     trial = xmltodict.parse(open(file, "rb"))['clinical_study']
     # for principal investigator
     sponsors = self._add_sponsors(trial)
     # for user
     users = self._add_users(trial)
     # for clinical trial itself
     product = create_product()
     product['ref'] = trial['id_info']['nct_id']
     product['name'] = trial['brief_title']
     product['asset']['type'] = 2
     del trial['brief_title']
     if 'brief_summary' in trial:
         product['abs'] = trial['brief_summary']['textblock']
         del trial['brief_summary']
     else:
         product['abs'] = product['name']
     if 'overall_contact' in trial:
         if 'phone' in trial['overall_contact']:
             product['contact']['phone'] = trial['overall_contact']['phone']
         if 'phone_ext' in trial['overall_contact'] and len(
                 trial['overall_contact']['phone_ext']) > 0:
             product['contact'][
                 'phone'] += '-' + trial['overall_contact']['phone_ext']
         if 'email' in trial['overall_contact']:
             product['contact']['email'] = trial['overall_contact']['email']
         del trial['overall_contact']
     # type
     product['tag'] = self._find_type(trial)
     product['asset']['ind'] = self._find_indication(trial)
     try:
         product['created'] = parser.parse(
             trial['study_first_submitted']).strftime(
                 "%a, %d %b %Y %H:%M:%S GMT")
     except:
         pass
     try:
         product['updated'] = parser.parse(
             trial['last_update_submitted']).strftime(
                 "%a, %d %b %Y %H:%M:%S GMT")
     except:
         pass
     # stage
     product['asset']['stat'] = self._get_status(trial)
     # for other details
     del trial['overall_status']
     product['asset']['tech'] = format_html_table(trial)
     for s in sponsors:
         s['ref'] = product['ref']
     for u in users:
         u['ref'] = product['ref']
     return {'product': product, 'sponsors': sponsors, 'users': users}
예제 #12
0
    def parse(self, response):
        name = response.url.split('/')[-1]
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        data = self.data[response.url]
        p = create_product()
        p['name'] = data['Medicine name']
        p['abs'] = data[
            'International non-proprietary name (INN) / common name']
        p['tag'] = [data['Category'] + ' Medicine', 'EU', 'Drug']
        if data['Therapeutic area'] is not None:
            p['tag'].extend(data['Therapeutic area'].split(", "))
        p['website'] = response.url
        p['ref'] = response.url
        if data['Patient safety'] == 'no':
            p['tag'].append('Patient Risk')
        if data['Orphan medicine'] == 'yes':
            p['tag'].append('Orphan medicine')
        if data['Species'] is not None and len(data['Species']) > 0:
            p['tag'].append('Species')
        p['updated'] = data['Revision date']
        p['created'] = data['First published']
        p['asset']['lic'] = p['tag']
        p['asset']['stat'] = 3
        p['asset']['type'] = 1
        p['intro'] = '\n'.join(
            response.xpath(
                "//div[contains(@class, 'views-field-field-ema-web-summary')]/div/p/text()"
            ).getall())
        p['asset']['market'] = dictionary_to_markdown(
            self.extract_market(response))
        p['asset']['tech'] = dictionary_to_markdown({
            key: data[key]
            for key in ('Active substance', 'Type of withdrawal',
                        'Date of withdrawal')
        })
        p['addr']['city'] = 'Unknown'
        p['addr']['country'] = 'EU'

        a = create_company()
        a['name'] = data['Marketing authorisation holder/company name']
        a['abs'] = 'A Medicine Company'
        a['addr']['city'] = 'Unknown'
        a['addr']['country'] = 'EU'
        a['entr']['bp'] = response.xpath(
            "//div[contains(@class, 'views-field-view-2')]/span/li/a/@href"
        ).get()
        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump({'product': p, 'applicant': a}, fo)
예제 #13
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url),
                 level=logging.INFO)
        name = response.url.split('/')[-1]
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['name'] = response.xpath("//h1[@id='page-title']/text()").get()
        product['ref'] = response.url
        product['contact']['website'] = response.url
        product['addr'] = deepcopy(self.address)
        product['asset']['type'] = 3
        description = self.get_description(response)
        abstract = extract_dictionary(description, 'Applications')
        product['abs'] = '\n'.join(abstract.values())
        if len(product['abs']) < 1:
            product['abs'] = next(iter(description.values()))
        if len(product['abs']) < 1:
            product['abs'] = product['name']
        market = extract_dictionary(description, 'Advantages')
        product['asset']['market'] = '\n'.join(market.values())
        tech = extract_dictionary(description, 'Technology')
        product['asset']['tech'] = '\n'.join(tech.values())
        for k in abstract:
            del description[k]
        for k in market:
            del description[k]
        for k in tech:
            del description[k]
        product['intro'] = dictionary_to_markdown(description)
        product['intro'] = dictionary_to_markdown(description)
        product['tag'] = self.add_keywords(response)
        product['contact'] = self.get_contact(response)

        inventors = self.add_inventors(response)
        for index, user in enumerate(inventors):
            if len(user['abs']) < 1:
                user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']

        patents = self.get_patents(response)
        publications = self.get_publications(response)
        with open(os.path.join(self.work_directory, name + 'json'), 'w') as fo:
            json.dump(
                {
                    'product': product,
                    'inventors': inventors,
                    'patents': patents,
                    'publications': publications
                }, fo)
예제 #14
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url),
                 level=logging.INFO)
        name = self.parse_name_from_url(response.url)
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['ref'] = response.url
        product['contact']['website'] = response.url
        product['name'] = response.xpath(self.title_xpath).get()
        meta = self.get_meta(response)
        abstract = extract_dictionary(meta, self.abstract_filter)
        product['abs'] = '\n'.join(abstract.values())
        market = extract_dictionary(meta, self.market_filter)
        product['asset']['market'] = '\n'.join(market.values())
        tech = extract_dictionary(meta, self.tech_filter)
        product['asset']['tech'] = '\n'.join(tech.values())
        for k in market:
            if k in meta:
                del meta[k]
        for k in tech:
            if k in meta:
                del meta[k]
        for k in abstract:
            if k in meta:
                del meta[k]
        product['intro'] = dictionary_to_markdown(meta)
        product['asset']['type'] = 3
        product['addr'] = deepcopy(self.address)
        product['tag'] = self.add_tags(response)
        inventors = self.add_inventors(response)
        for index, user in enumerate(inventors):
            user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']
            user['tag'] = product['tag']
        contact = self.get_contact(response)
        contact['abs'] = 'Inventor of ' + product['name']
        contact['addr'] = product['addr']
        contact['tag'] = product['tag']
        product['contact'] = contact['contact']

        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump(
                {
                    'product': product,
                    'inventors': inventors,
                    'contact': contact
                }, fo)
예제 #15
0
 def patent_callback(data: List[str], result: dict) -> None:
     if len(data) != len(self.PATENT_HEADER):
         self.logger.error('fail to parse {}'.format(data))
         return
     d = {k: v for k, v in zip(self.PATENT_HEADER, data)}
     patent = create_product()
     patent['tag'].append(d['type'])
     patent['ref'] = d['number']
     patent['addr']['country'] = d['country']
     patent['updated'] = format_datetime(d['date'])
     patent['abs'] = d['abstract']
     patent['name'] = d['title']
     patent['tag'].append(d['kind'])
     patent['tag'] = remove_empty_string_from_array(patent['tag'])
     result[patent['ref']] = patent
예제 #16
0
    def parse_domestic_drug(self, table: dict):
        product = create_product()
        product['name'] = self.use_nonempty(table['产品名称'], table['英文名称'])
        product['abs'] = table['商品名']
        product['intro'] = table['英文名称']
        product['asset']['lic'] = [
            table['批准文号'], table['药品本位码'], table['原批准文号']
        ]
        product['addr']['line1'] = table['生产地址']
        product['addr']['city'] = 'Unknown'
        product['addr']['country'] = '中国'
        product['tag'] = [table['剂型'], table['产品类别'], 'Drug']
        product['asset']['tech'] = self._dict_to_markdown_table(
            {k: table[k]
             for k in ('规格', '批准日期', '药品本位码备注')})

        return {'product': product}
예제 #17
0
def parse_drug(cells):
    review_category = {
        '1':
        'Gastrointestinal drugs, dermatologic drugs, immunosuppressive drugs, and others (not classified as other categories)',
        '2':
        "Cardiovascular drugs, antiparkinsonian drugs, anti-Alzheimer's drugs",
        '3-1':
        'Central/peripheral nervous system drugs (excluding anesthetic drugs)',
        '3-2':
        'Anesthetic drugs, sensory organ drugs (excluding drugs for inflammatory diseases), narcotics',
        '4':
        'Antibacterial drugs, antiviral drugs (excluding AIDS drugs), antifungal drugs, antiprotozoal drugs, anthelmintic drugs',
        '5':
        'Reproductive system drugs, drugs for urogenital system, combination drugs',
        '6-1':
        'Respiratory tract drugs, anti-allergy drugs (excluding dermatologic drugs), sensory organ drugs (drugs for inflammatory diseases)',
        '6-2':
        'Hormone drugs, drugs for metabolic disorders (including diabetes mellitus, osteoporosis, gout, and inborn errors of metabolism)',
    }
    p = create_product()
    p['name'] = cells[3]
    if isinstance(cells[2], datetime):
        p['created'] = cells[2]
    else:
        try:
            p['created'] = parser.parse(cells[2])
        except:
            pass
    category = review_category.get(cells[6], cells[6])
    p['tag'] = remove_empty_string_from_array([category, 'Japan PMDA', 'Drug'])
    p['asset']['lic'] = p['tag']
    p['asset']['stat'] = 2
    p['asset']['tech'] = cells[0]
    p['abs'] = cells[5]
    if len(p['abs']) < 1:
        p['abs'] = p['name']
    p['addr']['country'] = 'Japan'
    p['addr']['city'] = 'Unknown'

    a = create_company()
    a['name'] = cells[0]
    a['abs'] = 'A Drug Company'
    a['addr'] = p['addr']
    a['tag'] = p['tag']
    return p, a
예제 #18
0
    def add_patents(self, response: Response) -> list:
        """
        Add patents to the project.

        :param response: Response object
        :return a list of inventors
        """
        result = []
        for row in response.xpath(
                "//div[@class='field field-name-field-patent field-type-link-field field-label-above']/div[@class='field-items']//a"
        ):
            patent = create_product()
            patent['name'] = row.xpath("text()").get()
            patent['ref'] = row.xpath("@href").get()
            patent['contact']['website'] = patent['ref']
            patent['asset']['type'] = 1
            patent['abs'] = patent['name']
            result.append(patent)
        return result
예제 #19
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url), level=logging.INFO)
        name = response.url.split('/')[-1]
        with open(os.path.join(self.work_directory, name), 'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['name'] = response.xpath("//h1[@class='tech-heading tech-heading-main']/text()").get()
        product['ref'] = response.url
        product['contact']['website'] = response.url
        product['addr'] = deepcopy(self.address)
        product['asset']['type'] = 3
        description = self.get_description(response)
        abstract = extract_dictionary(description, 'brief|Brief|BRIEF')
        product['abs'] = '\n'.join(abstract.values())
        if len(product['abs']) < 1:
            product['abs'] = next(iter(description.values()))
        if len(product['abs']) < 1:
            product['abs'] = product['name']
        introduction = extract_dictionary(description, 'full|Full|FULL')
        product['intro'] = '\n'.join(introduction.values())
        for k in abstract:
            del description[k]
        for k in introduction:
            del description[k]
        product['asset']['market'] = dictionary_to_markdown(description)
        product['contact'] = self.get_contact(response)
        product['tag'] = self.add_keywords(response)

        contact_person = self.get_contact_person(response)
        contact_person['abs'] = 'Person of Contact for ' + product['name']
        contact_person['addr'] = product['addr']
        contact_person['contact'] = product['contact']
        contact_person['tag'] = product['tag']
        inventors = self.add_inventors(response)
        for index, user in enumerate(inventors):
            user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']
            user['tag'] = product['tag']

        patents = self.get_patents(response)
        with open(os.path.join(self.work_directory, name[:-4] + 'json'), 'w') as fo:
            json.dump({'product': product, 'contact': contact_person, 'inventors': inventors, 'patents': patents}, fo)
예제 #20
0
    def parse_import_drug(self, table: dict):
        product = create_product()
        product['name'] = self.use_nonempty(table['商品名(中文)'], table['商品名(英文)'])
        product['abs'] = self.use_nonempty(table['产品名称(中文)'],
                                           table['产品名称(英文)'])
        product['intro'] = table['商品名(英文)']
        product['asset']['lic'] = [
            table['注册证号'], table['原注册证号'], table['分包装批准文号'], table['药品本位码']
        ]
        product['addr']['line1'] = table['生产地址']
        product['addr']['city'] = 'Unknown'
        product['addr']['country'] = '中国'
        product['tag'] = [table['剂型(中文)'], table['产品类别'], 'Drug']
        product['asset']['tech'] = self._dict_to_markdown_table({
            k: table[k]
            for k in ('产品名称(英文)', '包装规格(中文)', '规格(中文)', '发证日期', '有效期截止日',
                      '药品本位码备注')
        })

        company = create_company()
        company['name'] = self.use_nonempty(table['公司名称(英文)'],
                                            table['公司名称(中文)'])
        company['addr']['city'] = 'Unknown'
        company['addr']['line1'] = self.use_nonempty(table['地址(英文)'],
                                                     table['地址(中文)'])
        company['addr']['country'] = self.use_nonempty(table['国家/地区(英文)'],
                                                       table['国家/地区(中文)'])

        manufacture = create_company()
        manufacture['name'] = self.use_nonempty(table['生产厂商(英文)'],
                                                table['生产厂商(中文)'])
        manufacture['addr']['city'] = 'Unknown'
        manufacture['addr']['line1'] = self.use_nonempty(
            table['厂商地址(英文)'], table['厂商地址(中文)'])
        manufacture['addr']['country'] = self.use_nonempty(
            table['厂商国家/地区(英文)'], table['厂商国家/地区(中文)'])

        return {
            'product': product,
            'company': company,
            'manufacture': manufacture
        }
예제 #21
0
    def get_patents(self, response: Response) -> list:
        """
        Obtain the patents.

        :param response: response
        :return list of patents
        """
        patents = []
        for row in response.xpath("//tr[@class='patentRow']"):
            patent = create_product()
            patent['asset']['type'] = 1
            patent['asset']['status'] = 1 if re.match(
                'issued|approved', row.xpath("//td[2]/text()").get(), re.IGNORECASE) is None else 2
            patent['ref'] = row.xpath("//td[3]/a/@href").get()
            patent['name'] = row.xpath("//td[3]/a/text()").get()
            try:
                patent['created'] = parse(row.xpath["//td[4]"]).strftime("%a, %d %b %Y %H:%M:%S GMT")
            except:
                pass
        return patents
예제 #22
0
    def parse_domestic_cosmetic(self, table: dict):
        product = create_product()
        product['name'] = table['产品名称']
        product['abs'] = table['产品名称备注']
        product['asset']['lic'] = [table['批准文号'], table['卫生许可证号']]
        product['addr']['line1'] = table['生产企业地址']
        product['addr']['city'] = 'Unknown'
        product['addr']['country'] = '中国'
        product['tag'] = [table['产品类别'], 'Cosmetic']
        product['asset']['tech'] = self._dict_to_markdown_table(
            {k: table[k]
             for k in ('批件状态', '批准日期', '批件有效期', '备注')})

        manufacture = create_company()
        manufacture['name'] = table['生产企业	']
        manufacture['addr']['city'] = 'Unknown'
        manufacture['addr']['line1'] = table['生产企业地址']
        manufacture['addr']['country'] = 'China'

        return {'product': product, 'manufacture': manufacture}
예제 #23
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url), level=logging.INFO)
        name = response.url.split('/')[-1]
        with open(os.path.join(self.work_directory, name + '.html'), 'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['ref'] = response.url
        product['contact']['website'] = response.url
        meta = self.get_meta(response)
        product['name'] = meta['Project Title']
        try:
            product['created'] = parse(meta['Posted Date']).strftime("%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
        product['tag'] = meta['Tags']
        if len(meta['banner']) > 0:
            product['logo'] = meta['banner'][0]
        product['asset']['type'] = 3
        abstract = extract_dictionary(meta, 'brief|Brief|BRIEF|Short')
        product['abs'] = '\n'.join(abstract.values())
        if len(product['abs']) < 1:
            product['abs'] = next(iter(meta.values()))
        if len(product['abs']) < 1:
            product['abs'] = product['name']
        introduction = extract_dictionary(meta, 'abstract|Abstract')
        product['intro'] = '\n'.join(introduction.values())
        for k in abstract:
            del meta[k]
        for k in introduction:
            del meta[k]
        product['asset']['market'] = dictionary_to_markdown(meta)
        product['contact'] = self.get_contact(response)
        product['addr'] = deepcopy(self.get_address(response))
        inventors = self.add_inventors(response)
        for index, user in enumerate(inventors):
            user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']
            user['tag'] = product['tag']

        with open(os.path.join(self.work_directory, name + '.json'), 'w') as fo:
            json.dump({'product': product, 'inventors': inventors}, fo)
예제 #24
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url),
                 level=logging.INFO)
        name = response.url.split('/')[-2]
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['ref'] = response.url
        product['contact']['website'] = response.url
        product['name'] = response.xpath("string(//h1)").get()
        meta = self.get_meta(response)
        abstract = extract_dictionary(meta, 'Advantage|advantage|Abstract')
        product['abs'] = '\n'.join(abstract.values())
        if len(product['abs']) < 1:
            product['abs'] = next(iter(meta.values()))
        if len(product['abs']) < 1:
            product['abs'] = product['name']
        product['asset']['tech'] = dictionary_to_markdown(
            meta, ('Technology', ))
        product['asset']['market'] = dictionary_to_markdown(
            meta, ('Value Proposition', 'Value proposition'))
        for k in abstract:
            del meta[k]
        for key in ('Value Proposition', 'Value proposition', 'Technology'):
            if key in meta:
                del meta[key]
        product['intro'] = dictionary_to_markdown(meta)
        product['asset']['type'] = 3
        product['addr'] = deepcopy(self.address)
        inventors = self.add_inventors(response)
        product['tag'] = self.add_tags(response)
        for index, user in enumerate(inventors):
            user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']
            user['tag'] = product['tag']
        product['contact'] = self.get_contact(response)

        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump({'product': product, 'inventors': inventors}, fo)
예제 #25
0
 def parse(self, response):
     name = response.url.split('/')[-1]
     with open(os.path.join(self.work_directory, name + '.html'),
               'wb') as fo:
         fo.write(response.body)
     data = self.data[response.url]
     p = create_product()
     p['name'] = data['English common name of herbal substance']
     p['abs'] = data['Botanical name of plant']
     p['tag'] = ["EU", 'Drug', 'Herbal']
     if data['Use'] is not None:
         p['tag'].extend(data['Use'].split(', '))
     p['website'] = response.url
     p['ref'] = response.url
     if data['Combination'] == 'yes':
         p['tag'].append('Combination')
     p['asset']['stat'] = self.status_code(data['Status'])
     p['asset']['tech'] = dictionary_to_markdown({
         key: data[key]
         for key in ('Latin name of herbal substance', 'Outcome',
                     'Date added to the inventory',
                     'Date added to the priority list', 'First published',
                     'Revision date')
     })
     p['updated'] = data['Revision date']
     p['created'] = data['First published']
     p['asset']['lic'] = p['tag']
     p['asset']['type'] = 1
     p['intro'] = '\n'.join(
         response.xpath(
             "//div[contains(@class, 'field-name-field-ema-web-summary')]/div/div/p/text()"
         ).getall())
     p['asset']['market'] = dictionary_to_markdown(
         self.extract_market(response))
     p['addr']['city'] = 'Unknown'
     p['addr']['country'] = 'EU'
     with open(os.path.join(self.work_directory, name + '.json'),
               'w') as fo:
         json.dump(p, fo)
예제 #26
0
def main():
    product_code = get_product_code()
    log = create_logger('510K')
    result = json.load(open(os.path.expanduser('~/work/fda/device-classification-0001-of-0001.json'), 'r'))
    log.critical(datetime.datetime.now())
    for r in result['results']:
        p = create_product()
        p['name'] = r.get('device_name', r['openfda'].get('device_name', ''))
        p['ref'] = r.get('k_number', r['openfda'].get('k_number', ''))
        p['addr']['line1'] = r.get('address_1', r['openfda'].get('address_1', ''))
        p['addr']['line2'] = r.get('address_2', r['openfda'].get('address_2', ''))
        p['addr']['city'] = r.get('city', r['openfda'].get('city', ''))
        p['addr']['state'] = r.get('state', r['openfda'].get('state', ''))
        p['addr']['zip'] = r.get('zip_code', r['openfda'].get('zip_code', ''))
        p['addr']['country'] = r.get('country_code', r['openfda'].get('country_code', ''))
        p['intro'] = r.get('statement_or_summary', r['openfda'].get('statement_or_summary', ''))
        p['asset']['type'] = 0
        p['tag'] = [
            r.get('advisory_committee_description', r['openfda'].get('advisory_committee_description', '')),
            r.get('medical_specialty_description', r['openfda'].get('medical_specialty_description', '')),
            'FDA',
            'Medical Device',
            '510K']
        # p['tag'] is used for tags readable to common users, p['lic'] is used for tags specified for product.
        p['asset']['lic'] = [
            'FDA',
            '510K',
            r.get('clearance_type', r['openfda'].get('clearance_type', '')),
            r.get('advisory_committee_description', r['openfda'].get('advisory_committee_description', '')),
            r['openfda'].get('medical_specialty_description', ''),
            r.get('product_code', r['openfda'].get('product_code', '')),
            r.get('regulation_number', r['openfda'].get('regulation_number', '')),
            r.get('decision_description', r['openfda'].get('decision_description', '')), ]
        p['asset']['lic'].extend(third_party(r.get('third_party_flag', r['openfda'].get('third_party_flag', ''))))
        if len(r.get('expedited_review_flag', r['openfda'].get('expedited_review_flag', ''))) > 0:
            p['asset']['lic'].append('Expedited Review')
        if r.get('submission_type_id', r['openfda'].get('submission_type_id', '')) not in {'1', '2'} and \
                submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))) is not None:
            p['asset']['lic'].append(
                submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))))
            p['tag'].append(submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))))
        code = product_code.get(r.get('product_code', r['openfda'].get('product_code', '')), None)
        if code is not None:
            p['abs'] = code['device_name']
            p['asset']['lic'].extend([
                'Class ' + code['device_class'],
                'GMP Exempt' if code['gmp_exempt_flag'] == 'N' else 'GMP Required',
            ])
            p['tag'].append('Class ' + code['device_class'])
            if code['implant_flag'] != 'N':
                p['asset']['lic'].append('Implant')
                p['tag'].append('Implant')
            if code['life_sustain_support_flag'] != 'N':
                p['asset']['lic'].append('Life Sustain Support')
                p['tag'].append('Life Sustain Support')
        else:
            p['abs'] = p['name']
        p['asset']['stat'] = map_status(r.get('decision_code', r['openfda'].get('decision_code', '')))
        try:
            p['created'] = parser.parse(r.get('date_received', r['openfda'].get('date_received', None))).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
        try:
            p['updated'] = parser.parse(r.get('decision_date', r['openfda'].get('decision_date', None))).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
        p['asset']['lic'] = remove_empty_string_from_array(p['asset']['lic'])
        p['tag'] = remove_empty_string_from_array(p['tag'])
        a = create_company()
        a['name'] = r.get('applicant', r['openfda'].get('applicant', ''))
        a['abs'] = 'A Medical Device Company'
        a['addr'] = p['addr']
        a['tag'] = p['tag']
        a['group']['parentId'] = '000000000000000000000000'
        # contact is just the name of contact

        response = add_record('entity', [p, a])
        if response['_status'] != 'OK':
            log.error('fail to create record for {}'.format(p['name']))
            continue
        applicant_product = create_relationship(response['_items'][1]['_id'], response['_items'][0]['_id'])
        applicant_product['type'] = 7
        applicant_product['name'] = 'Applicant'
        applicant_product['abs'] = 'Applicant'
        response = add_record('relationship', [applicant_product])
        if response['_status'] != 'OK':
            log.error('fail to create relationship for {}'.format(p['name']))
        else:
            log.debug('added {} to the system'.format(p['name']))
    log.critical(datetime.datetime.now())
예제 #27
0
    def parse(self, response):
        name = response.url.split('/')[-1]
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        data = self.data[response.url]
        p = create_product()
        p['name'] = data['Medicine name']
        p['abs'] = data[
            'International non-proprietary name (INN) / common name']
        p['tag'] = [data['Category'] + ' Medicine', 'EU', 'Drug']
        if data['Therapeutic area'] is not None:
            p['tag'].extend(data['Therapeutic area'].split(", "))
        p['contact']['website'] = response.url
        p['ref'] = response.url
        if data['Patient safety'] == 'no':
            p['tag'].append('Patient Risk')
        if data['Additional monitoring'] == 'yes':
            p['tag'].append('Additional monitoring')
        if data['Generic'] == 'yes':
            p['tag'].append('Generic')
        if data['Biosimilar'] == 'yes':
            p['tag'].append('Biosimilar')
        if data['Biosimilar'] == 'yes':
            p['tag'].append('Biosimilar')
        if data['Conditional approval'] == 'yes':
            p['tag'].append('Conditional approval')
        if data['Exceptional circumstances'] == 'yes':
            p['tag'].append('Exceptional circumstances')
        if data['Accelerated assessment'] == 'yes':
            p['tag'].append('Accelerated assessment')
        if data['Accelerated assessment'] == 'yes':
            p['tag'].append('Accelerated assessment')
        if data['Orphan medicine'] == 'yes':
            p['tag'].append('Orphan medicine')
        if data['Species'] is not None and len(data['Species']) > 0:
            p['tag'].append('Species')
        p['updated'] = data['Decision date']
        p['created'] = data['First published']
        p['asset']['lic'] = p['tag']
        p['asset']['stat'] = self.status_code(data['Authorisation status'])
        p['asset']['type'] = 1
        p['intro'] = '\n'.join(
            response.xpath(
                "//div[contains(@class, 'field-name-field-ema-web-summary')]/div/div/div/p/text()"
            ).getall())
        p['asset']['market'] = dictionary_to_markdown(
            self.extract_market(response))
        p['asset']['tech'] = dictionary_to_markdown({
            key: data[key]
            for key in ('Active substance', 'Revision number',
                        'Revision number', 'ATCvet code', 'First published',
                        'Revision date', 'Decision date', 'Date of opinion',
                        'Marketing authorisation date',
                        'Date of refusal of marketing authorisation',
                        'ATC code', 'Product number',
                        'Human pharmacotherapeutic group',
                        'Vet pharmacotherapeutic group',
                        'Condition / indication')
        })

        a = create_company()
        a['name'] = data['Marketing authorisation holder/company name']
        a['abs'] = 'A Medicine Company'
        a['addr'] = self.extract_address(response)
        a['entr']['bp'] = response.xpath(
            "//div[contains(@class, 'field-name-ema-medicine-all-author-pres-docs')]/div/div/div/section/div/div/div/div/ul/li/a/@href"
        ).get()
        p['addr'] = a['addr']
        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump({'product': p, 'applicant': a}, fo)
예제 #28
0
def parse_html(file):
    document = etree.parse(file, etree.HTMLParser())
    product = create_product()
    data_english = parse(document, 'cn')
    data_chinese = parse(document, 'en')
    product['name'] = data_chinese[u'注册题目']
    product['abs'] = data_chinese[u'研究目的']
    product['asset']['stat'] = map_status(data_english['Recruiting status'])
    product['intro'] = data_chinese['药物成份或治疗方案详述']
    href = document.xpath("//body/div[4]/div[2]/a")
    product['ref'] = 'http://www.chictr.org.cn/' + (href[0].attrib['href']
                                                    if len(href) > 0 else '')

    product['tag'].append(data_chinese[u'研究疾病'])
    product['tag'].append(data_english[u'Target disease'])
    product['tag'].append(data_chinese[u'研究疾病代码'])
    product['tag'].append(data_english[u'Target disease code'])
    product['tag'].append(data_chinese[u'研究类型'])
    product['tag'].append(data_english[u'Study type'])
    product['tag'].append(data_chinese[u'研究所处阶段'])
    product['tag'].append(data_english[u'Study phase'])
    product['tag'].append(data_chinese[u'研究类型'])
    product['tag'].append(data_english[u'Study type'])
    product['tag'] = remove_empty_string(product['tag'])

    product['asset']['lic'].append(data_chinese['研究课题代号(代码)'])
    product['asset']['lic'].append(data_chinese['注册号'])
    product['asset']['lic'].append(data_chinese['伦理委员会批件文号'])
    product['asset']['lic'] = remove_empty_string(product['asset']['lic'])

    product['asset']['type'] = 2
    try:
        product['created'] = parser.parse(
            data_english['Date of Registration']).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
    except:
        pass
    try:
        product['updated'] = parser.parse(
            data_english['Date of Last Refreshed on']).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
    except:
        pass
    product['asset']['tech'] = dictionary_to_markdown(data_english, [
        'Study design', 'Inclusion criteria', 'Exclusion criteria',
        'Study execute time', 'Interventions',
        'Countries of recruitment and research settings', 'Outcomes',
        'Collecting sample(s) from participants', 'Participant age', 'Gender',
        'Randomization Procedure (please state who generates the random number sequence and by what method)',
        'Blinding', 'The time of sharing IPD',
        'The way of sharing IPD”(include metadata and protocol, If use web-based public database, please provide the url)',
        'Data collection and Management (A standard data collection and management system include a CRF and an electronic data capture',
        'Data Managemen Committee'
    ])
    product['asset']['tech'] += dictionary_to_markdown(data_chinese, [
        '研究设计', '纳入标准', '排除标准', '研究实施时间', '干预措施', '研究实施地点', '测量指标', '采集人体标本',
        '年龄范围', '性别', '随机方法(请说明由何人用什么方法产生随机序列)', '盲法', '原始数据公开时间',
        '共享原始数据的方式(说明:请填入公开原始数据日期和方式,如采用网络平台,需填该网络平台名称和网址)',
        '数据采集和管理(说明:数据采集和管理由两部分组成,一为病例记录表(Case Record Form, CRF),二为电子采集和管理系统(Electronic Data Capture, EDC),如ResMan即为一种基于互联网的EDC',
        '数据管理委员会'
    ])

    applicant = create_user()
    applicant['name'] = data_chinese[u'申请注册联系人']
    applicant['abs'] = 'Applicant'
    applicant['contact']['phone'] = data_chinese[u'申请注册联系人电话']
    applicant['contact']['email'] = data_chinese[u'申请注册联系人电子邮件']
    applicant['contact']['website'] = data_chinese[u'申请单位网址(自愿提供)']
    applicant['addr'] = parse_address(data_english[u'Applicant address'])
    applicant['addr']['zip'] = data_chinese[u'申请注册联系人邮政编码']
    applicant['exp']['exp']['company'] = data_chinese[u'申请人所在单位']
    principal_investigator = create_user()
    principal_investigator['name'] = data_chinese[u'研究负责人']
    principal_investigator['abs'] = 'Principal Investigator'
    principal_investigator['contact']['phone'] = data_chinese[u'研究负责人电话']
    principal_investigator['contact']['email'] = data_chinese[u'研究负责人电子邮件']
    principal_investigator['contact']['website'] = data_chinese[
        u'研究负责人网址(自愿提供)']
    principal_investigator['addr'] = parse_address(
        data_english[u"Study leader's address"])
    principal_investigator['addr']['zip'] = data_chinese[u'研究负责人邮政编码']

    product['addr'] = copy.deepcopy(applicant['addr'])
    return {
        'product': product,
        'applicant': applicant,
        'principal_investigator': principal_investigator
    }
예제 #29
0
        def process_one_article(_, article):
            if 'MedlineCitation' not in article:
                return False
            p = create_product()
            p['ref'] = article['MedlineCitation']['PMID']["#text"]
            if 'DateCompleted' in article['MedlineCitation']:
                date = article['MedlineCitation']['DateCompleted']
                p['created'] = datetime(
                    int(date['Year']), int(date['Month']),
                    int(date['Day'])).strftime("%a, %d %b %Y %H:%M:%S GMT")
            if 'DateRevised' in article['MedlineCitation']:
                date = article['MedlineCitation']['DateRevised']
                p['created'] = datetime(
                    int(date['Year']), int(date['Month']),
                    int(date['Day'])).strftime("%a, %d %b %Y %H:%M:%S GMT")
            p['name'] = article['MedlineCitation']['Article']['ArticleTitle']
            if isinstance(p['name'], dict):
                if "#text" in p['name']:
                    p['name'] = p['name']['#text']
                elif "b" in p['name']:
                    p['name'] = p['name']["b"]
                else:
                    return True
            p['asset']['type'] = 4

            if 'Abstract' in article['MedlineCitation']['Article']:
                self._abstract(
                    article['MedlineCitation']['Article']['Abstract'], p)
            if 'CoiStatement' in article['MedlineCitation']:
                if isinstance(article['MedlineCitation']['CoiStatement'],
                              dict):
                    if 'b' in article['MedlineCitation']['CoiStatement']:
                        if isinstance(
                                article['MedlineCitation']['CoiStatement']
                            ['b'], list):
                            p['intro'] += '\n'.join(article['MedlineCitation']
                                                    ['CoiStatement']['b'])
                        else:
                            p['intro'] += article['MedlineCitation'][
                                'CoiStatement']['b']
                    elif '#text' in article['MedlineCitation']['CoiStatement']:
                        p['intro'] += article['MedlineCitation'][
                            'CoiStatement']['#text']
                elif isinstance(article['MedlineCitation']['CoiStatement'],
                                str):
                    p['intro'] += article['MedlineCitation']['CoiStatement']

            authors = []
            if 'AuthorList' in article['MedlineCitation']['Article']:
                authors.extend(
                    self._authors(article['MedlineCitation']['Article']
                                  ['AuthorList']['Author']))
            if 'InvestigatorList' in article['MedlineCitation']:
                authors.extend(
                    self._authors(article['MedlineCitation']
                                  ['InvestigatorList']['Investigator']))

            if 'MeshHeadingList' in article['MedlineCitation']:
                p['tag'].extend(
                    self._text_from_list_or_dict(
                        article['MedlineCitation']['MeshHeadingList']
                        ['MeshHeading'], "DescriptorName"))
            if 'SupplMeshList' in article['MedlineCitation']:
                p['tag'].extend(
                    self._text_from_list_or_dict(
                        article['MedlineCitation']['SupplMeshList']
                        ['SupplMeshName']))

            p['asset']['ind'].extend(
                self._text_from_list_or_dict(
                    article['MedlineCitation']['Article']
                    ['PublicationTypeList']['PublicationType']))
            if 'GeneSymbolList' in article['MedlineCitation']:
                p['asset']['ind'].extend(
                    self._text_from_list_or_dict(
                        article['MedlineCitation']['GeneSymbolList']
                        ['GeneSymbol']))
            if 'ChemicalList' in article['MedlineCitation']:
                p['asset']['ind'].extend(
                    self._text_from_list_or_dict(
                        article['MedlineCitation']['ChemicalList']['Chemical'],
                        'NameOfSubstance'))
            if 'KeywordList' in article['MedlineCitation']:
                p['asset']['ind'].extend(
                    self._text_from_list_or_dict(
                        article['MedlineCitation']['KeywordList']['Keyword']))

            # TODO: journal information from article['MedlineCitation']['MedlineJournalInfo']
            p['asset']['lic'] = self._text_from_list_or_dict(
                article['PubmedData']['ArticleIdList']['ArticleId'])
            if 'OtherID' in article['PubmedData']:
                p['asset']['lic'].extend(
                    self._text_from_list_or_dict(
                        article['PubmedData']['OtherID']))

            if 'ReferenceList' in article['PubmedData']:
                references = self._references(
                    article['PubmedData']['ReferenceList'])
            else:
                references = []
            result.append({
                "article": p,
                "reference": references,
                "author": authors
            })
            return True