def test_normalize_document_identifier2(self):
     teststring = "US5260728"
     resstring = xml_util.normalize_document_identifier(teststring)
     self.assertTrue(resstring == "US5260728")
     teststring = "USD5260728"
     resstring = xml_util.normalize_document_identifier(teststring)
     self.assertTrue(resstring == "USD5260728")
Exemple #2
0
 def test_normalize_document_identifier3(self):
     teststring = "D0123456"
     resstring = xml_util.normalize_document_identifier(teststring)
     self.assertTrue(resstring == "D123456", resstring)
     teststring = "D123456"
     resstring = xml_util.normalize_document_identifier(teststring)
     self.assertTrue(resstring == "D123456", resstring)
 def test_normalize_document_identifier3(self):
     teststring = "D0123456"
     resstring = xml_util.normalize_document_identifier(teststring)
     self.assertTrue(resstring == "D123456", resstring)
     teststring = "D123456"
     resstring = xml_util.normalize_document_identifier(teststring)
     self.assertTrue(resstring == "D123456", resstring)
Exemple #4
0
 def test_normalize_document_identifier2(self):
     teststring = "US5260728"
     resstring = xml_util.normalize_document_identifier(teststring)
     self.assertTrue(resstring == "US5260728")
     teststring = "USD5260728"
     resstring = xml_util.normalize_document_identifier(teststring)
     self.assertTrue(resstring == "USD5260728")
    def __init__(self, filename, is_string=False):
        xh = xml_driver.XMLHandler()
        parser = xml_driver.make_parser()
        parser.setContentHandler(xh)
        parser.setFeature(xml_driver.handler.feature_external_ges, False)
        l = xml.sax.xmlreader.Locator()
        xh.setDocumentLocator(l)
        if is_string:
            parser.parse(cStringIO.StringIO(filename))
        else:
            parser.parse(filename)
        self.xml = xh.root.us_patent_grant.us_bibliographic_data_grant

        self.country = self.xml.publication_reference.contents_of('country')[0]
        self.patent = xml_util.normalize_document_identifier(self.xml.publication_reference.contents_of('doc_number')[0])
        self.kind = self.xml.publication_reference.contents_of('kind')[0]
        self.date_grant = self.xml.publication_reference.contents_of('date')[0]
        self.pat_type = self.xml.application_reference[0].get_attribute('appl-type')
        self.date_app = self.xml.application_reference.contents_of('date')[0]
        self.country_app = self.xml.application_reference.contents_of('country')[0]
        self.patent_app = self.xml.application_reference.contents_of('doc_number')[0]
        self.code_app = self.xml.contents_of('us_application_series_code')[0]
        self.clm_num = self.xml.contents_of('number_of_claims')[0]
        self.classes = self._classes()
        self.abstract = xh.root.us_patent_grant.abstract.contents_of('p','')
        self.invention_title = self._invention_title()
        self.asg_list = self._asg_list()
        self.cit_list = self._cit_list()
        self.rel_list = self._rel_list()
        self.inv_list = self._inv_list()
        self.law_list = self._law_list()
    def __init__(self, xml_string, filename, is_string=False):
        xh = xml_driver.XMLHandler()
        parser = xml_driver.make_parser()
        parser.setContentHandler(xh)
        parser.setFeature(xml_driver.handler.feature_external_ges, False)
        l = xml.sax.xmlreader.Locator()
        xh.setDocumentLocator(l)
        if is_string:
            parser.parse(StringIO(xml_string))
        else:
            parser.parse(xml_string)

        self.attributes = ['app','application','assignee_list','inventor_list',
                          'us_classifications',
                          'claims']

        self.xml = xh.root.patent_application_publication
        self.xml_string = xml_string
        
        if filter(lambda x: not isinstance(x, list), self.xml.contents_of('country_code')):
            self.country = filter(lambda x: not isinstance(x, list), self.xml.contents_of('country_code'))[0]
        else:
            self.country = ''
        self.application = xml_util.normalize_document_identifier(self.xml.document_id.contents_of('doc_number')[0])
        self.kind = self.xml.document_id.contents_of('kind_code')[0]
        try:
            self.pat_type = type_kind[self.kind]
        except:
            self.pat_type = None
        self.date_app = self.xml.document_id.contents_of('document_date')[0]
        self.clm_num = len(self.xml.subdoc_claims.claim)
        #self.abstract = self.xml.subdoc_abstract.contents_of('paragraph', '', as_string=True, upper=False)
        try:
            self.abstract = re.search('<subdoc-abstract>(.*?)</subdoc-abstract>',xml_string,re.DOTALL).group(1)
            self.abstract = re.sub('<.*?>|</.*?>','',self.abstract)
            self.abstract = re.sub('[\n\t\r\f]+','',self.abstract)
            self.abstract = re.sub('\s+',' ',self.abstract)
            self.abstract = h.unescape(self.abstract)
        except:
            self.abstract = ''
        self.invention_title = h.unescape(self._invention_title())
        self.filename = re.search('i?pa[0-9]*.*$',filename,re.DOTALL).group()
        
        self.app = {
            "id": self.application,
            "type": self.pat_type,
            "number": self.application,
            "country": self.country,
            "date": self._fix_date(self.date_app),
            "abstract": self.abstract,
            "title": self.invention_title,
            "kind": self.kind,
            "num_claims": self.clm_num,
            "filename": self.filename
        }
        self.app["id"] = str(self.app["date"])[:4] + "/" + self.app["number"]
    def __init__(self, xml_string, filename, is_string=False):
        xh = xml_driver.XMLHandler()
        parser = xml_driver.make_parser()

        parser.setContentHandler(xh)
        parser.setFeature(xml_driver.handler.feature_external_ges, False)
        l = xml.sax.xmlreader.Locator()
        xh.setDocumentLocator(l)
        if is_string:
            parser.parse(StringIO(xml_string))
        else:
            parser.parse(xml_string)

        self.attributes = ['pat','app','assignee_list','patent','inventor_list','lawyer_list',
                     'us_relation_list','us_classifications','ipcr_classifications',
                     'citation_list','claims']

        self.xml = xh.root.us_patent_grant
        self.xml_string = xml_string
        self.country = self.xml.publication_reference.contents_of('country', upper=False)[0]
        self.patent = xml_util.normalize_document_identifier(self.xml.publication_reference.contents_of('doc_number')[0])
        self.kind = self.xml.publication_reference.contents_of('kind')[0]
        self.date_grant = self.xml.publication_reference.contents_of('date')[0]
        if self.xml.application_reference:
            self.pat_type = self.xml.application_reference[0].get_attribute('appl-type', upper=False)
        else:
            self.pat_type = None
        self.date_app = self.xml.application_reference.contents_of('date')[0]
        self.country_app = self.xml.application_reference.contents_of('country')[0]
        self.patent_app = self.xml.application_reference.contents_of('doc_number')[0]
        self.code_app = self.xml.contents_of('us_application_series_code')[0]
        self.clm_num = self.xml.contents_of('number_of_claims')[0]
        self.abstract = h.unescape(xh.root.us_patent_grant.abstract.contents_of('p', '', as_string=True, upper=False))
        self.invention_title = h.unescape(self._invention_title())
        self.filename = re.search('ipg.*$',filename,re.DOTALL).group()

        self.pat = {
            "id": self.patent,
            "type": self.pat_type,
            "number": self.patent,
            "country": self.country,
            "date": self._fix_date(self.date_grant),
            "abstract": self.abstract,
            "title": self.invention_title,
            "kind": self.kind,
            "num_claims": self.clm_num,
            "filename": self.filename
        }
        self.app = {
            "type": self.code_app,
            "number": self.patent_app,
            "country": self.country_app,
            "date": self._fix_date(self.date_app)
        }
        self.app["id"] = str(self.app["date"])[:4] + "/" + self.app["number"]
 def _get_doc_info(self, root):
     """
     Accepts an XMLElement root as an argument. Returns list of
     [country, doc-number, kind, date] for the given root
     """
     res = {}
     for tag in ['country', 'kind', 'date']:
         data = root.contents_of(tag)
         res[tag] = data[0] if data else ''
     res['number'] = xml_util.normalize_document_identifier(
         root.contents_of('doc_number')[0])
     return res
 def citation_list(self):
     """
     Returns a list of two lists. The first list is normal citations,
     the second is other citations.
     citation:
       date
       name
       kind
       country
       category
       number
       sequence
     OR
     otherreference:
       text
       sequence
     """
     citations = self.xml.references_cited.citation
     if not citations:
         return [[], []]
     regular_cits = []
     other_cits = []
     ocnt = 0
     ccnt = 0
     for citation in citations:
         data = {}
         if citation.othercit:
             data['text'] = citation.contents_of('othercit',
                                                 as_string=True,
                                                 upper=False)
             if any(data.values()):
                 data['sequence'] = ocnt
                 data['uuid'] = str(uuid.uuid1())
                 other_cits.append(data)
                 ocnt += 1
         else:
             for tag in ['kind', 'category']:
                 data[tag] = citation.contents_of(tag,
                                                  as_string=True,
                                                  upper=False)
             data['date'] = self._fix_date(
                 citation.contents_of('date', as_string=True))
             data['country'] = citation.contents_of('country',
                                                    default=[''])[0]
             doc_number = citation.contents_of('doc_number', as_string=True)
             data['number'] = xml_util.normalize_document_identifier(
                 doc_number)
             if any(data.values()):
                 data['sequence'] = ccnt
                 data['uuid'] = str(uuid.uuid1())
                 regular_cits.append(data)
                 ccnt += 1
     return [regular_cits, other_cits]
Exemple #10
0
    def __init__(self, xml_string, is_string=False):
        xh = xml_driver.XMLHandler()
        parser = xml_driver.make_parser()

        parser.setContentHandler(xh)
        parser.setFeature(xml_driver.handler.feature_external_ges, False)
        l = xml.sax.xmlreader.Locator()
        xh.setDocumentLocator(l)
        if is_string:
            parser.parse(StringIO(xml_string))
        else:
            parser.parse(xml_string)

        self.attributes = [
            'app', 'application', 'assignee_list', 'inventor_list',
            'us_classifications', 'claims'
        ]

        self.xml = xh.root.patent_application_publication

        if filter(lambda x: not isinstance(x, list),
                  self.xml.contents_of('country_code')):
            self.country = filter(lambda x: not isinstance(x, list),
                                  self.xml.contents_of('country_code'))[0]
        else:
            self.country = ''
        self.application = xml_util.normalize_document_identifier(
            self.xml.application_number.contents_of('doc_number')[0])
        self.kind = self.xml.document_id.contents_of('kind_code')[0]
        self.pat_type = None
        self.date_app = self.xml.domestic_filing_data.contents_of(
            'filing_date')[0]
        self.clm_num = len(self.xml.subdoc_claims.claim)
        self.abstract = self.xml.subdoc_abstract.contents_of('paragraph',
                                                             '',
                                                             as_string=True,
                                                             upper=False)
        self.invention_title = self._invention_title()

        self.app = {
            "id": self.application,
            "type": self.pat_type,
            "number": self.application[2:] + '/' + self.application[2:],
            "country": self.country,
            "date": self._fix_date(self.date_app),
            "abstract": self.abstract,
            "title": self.invention_title,
            "kind": self.kind,
            "num_claims": self.clm_num
        }
        self.app["id"] = str(self.date_app)[:4] + '/' + self.application
Exemple #11
0
    def __init__(self, xml_string, is_string=False):
        xh = xml_driver.XMLHandler()
        parser = xml_driver.make_parser()

        parser.setContentHandler(xh)
        parser.setFeature(xml_driver.handler.feature_external_ges, False)
        l = xml.sax.xmlreader.Locator()
        xh.setDocumentLocator(l)
        if is_string:
            parser.parse(StringIO(xml_string))
        else:
            parser.parse(xml_string)

        self.attributes = [
            'app', 'application', 'assignee_list', 'inventor_list',
            'us_classifications', 'claims'
        ]

        self.xml = xh.root.us_patent_application

        self.country = self.xml.application_reference.contents_of(
            'country', upper=False)[0]
        self.application = xml_util.normalize_document_identifier(
            self.xml.application_reference.contents_of('doc_number')[0])
        self.kind = self.xml.publication_reference.contents_of('kind')[0]
        self.date_app = self.xml.application_reference.contents_of('date')[0]
        if self.xml.application_reference:
            self.pat_type = self.xml.application_reference[0].get_attribute(
                'appl-type', upper=False)
        else:
            self.pat_type = None
        self.clm_num = len(self.xml.claims.claim)
        self.abstract = self.xml.abstract.contents_of('p',
                                                      '',
                                                      as_string=True,
                                                      upper=False)
        self.invention_title = self._invention_title()

        self.app = {
            "id": self.application,
            "type": self.pat_type,
            "number": self.application[2:] + '/' + self.application[2:],
            "country": self.country,
            "date": self._fix_date(self.date_app),
            "abstract": self.abstract,
            "title": self.invention_title,
            "kind": self.kind,
            "num_claims": self.clm_num
        }
        self.app["id"] = str(self.date_app)[:4] + '/' + self.application
 def _get_doc_info(self, root):
     """
     Accepts an XMLElement root as an argument. Returns list of
     [country, doc-number, kind, date] for the given root
     """
     res = {}
     country = root.contents_of('country_code')[0] if root.contents_of('country_code') else ''
     kind = root.contents_of('kind_code')[0] if root.contents_of('kind_code') else ''
     date = root.contents_of('document_date')[0] if root.contents_of('document_date') else ''
     res['country'] = country if country else ''
     res['kind'] = kind if kind else ''
     res['date'] = date if date else ''
     res['number'] = xml_util.normalize_document_identifier(
         root.contents_of('doc_number')[0])
     return res
 def citation_list(self):
     """
     Returns a list of two lists. The first list is normal citations,
     the second is other citations.
     citation:
       date
       name
       kind
       country
       category
       number
       sequence
     OR
     otherreference:
       text
       sequence
     """
     citations = self.xml.references_cited.citation
     if not citations:
         return [[], []]
     regular_cits = []
     other_cits = []
     ocnt = 0
     ccnt = 0
     for citation in citations:
         data = {}
         if citation.othercit:
             data['text'] = citation.contents_of('othercit', as_string=True, upper=False)
             if any(data.values()):
                 data['sequence'] = ocnt
                 data['uuid'] = str(uuid.uuid4())
                 other_cits.append(data)
                 ocnt += 1
         else:
             for tag in ['kind', 'category']:
                 data[tag] = citation.contents_of(tag, as_string=True, upper=False)
             data['date'] = self._fix_date(citation.contents_of('date', as_string=True))
             data['country'] = citation.contents_of('country', default=[''])[0]
             doc_number = citation.contents_of('doc_number', as_string=True)
             data['number'] = xml_util.normalize_document_identifier(doc_number)
             if any(data.values()):
                 data['sequence'] = ccnt
                 data['uuid'] = str(uuid.uuid4())
                 regular_cits.append(data)
                 ccnt += 1
     return [regular_cits, other_cits]
Exemple #14
0
 def _get_doc_info(self, root):
     """
     Accepts an XMLElement root as an argument. Returns list of
     [country, doc-number, kind, date] for the given root
     """
     res = {}
     country = root.contents_of('country_code')[0] if root.contents_of(
         'country_code') else ''
     kind = root.contents_of('kind_code')[0] if root.contents_of(
         'kind_code') else ''
     date = root.contents_of('document_date')[0] if root.contents_of(
         'document_date') else ''
     res['country'] = country if country else ''
     res['kind'] = kind if kind else ''
     res['date'] = date if date else ''
     res['number'] = xml_util.normalize_document_identifier(
         root.contents_of('doc_number')[0])
     return res
    def __init__(self, xml_string, is_string=False):
        xh = xml_driver.XMLHandler()
        parser = xml_driver.make_parser()

        parser.setContentHandler(xh)
        parser.setFeature(xml_driver.handler.feature_external_ges, False)
        l = xml.sax.xmlreader.Locator()
        xh.setDocumentLocator(l)
        if is_string:
            parser.parse(StringIO(xml_string))
        else:
            parser.parse(xml_string)

        self.attributes = ['app','application','assignee_list','inventor_list',
                      'us_classifications',
                     'claims']

        self.xml = xh.root.us_patent_application

        self.country = self.xml.publication_reference.contents_of('country', upper=False)[0]
        self.application = xml_util.normalize_document_identifier(self.xml.publication_reference.contents_of('doc_number')[0])
        self.kind = self.xml.publication_reference.contents_of('kind')[0]
        if self.xml.application_reference:
            self.pat_type = self.xml.application_reference[0].get_attribute('appl-type', upper=False)
        else:
            self.pat_type = None
        self.date_app = self.xml.publication_reference.contents_of('date')[0]
        self.clm_num = len(self.xml.claims.claim)
        self.abstract = self.xml.abstract.contents_of('p', '', as_string=True, upper=False)
        self.invention_title = self._invention_title()

        self.app = {
            "id": self.application,
            "type": self.pat_type,
            "number": self.application,
            "country": self.country,
            "date": self._fix_date(self.date_app),
            "abstract": self.abstract,
            "title": self.invention_title,
            "kind": self.kind,
            "num_claims": self.clm_num
        }
        self.app["id"] = str(self.app["date"])[:4] + "/" + self.app["number"]
    def __init__(self, xml_string, is_string=False):
        xh = xml_driver.XMLHandler()
        parser = xml_driver.make_parser()

        parser.setContentHandler(xh)
        parser.setFeature(xml_driver.handler.feature_external_ges, False)
        l = xml.sax.xmlreader.Locator()
        xh.setDocumentLocator(l)
        if is_string:
            parser.parse(StringIO(xml_string))
        else:
            parser.parse(xml_string)

        self.attributes = ['app','application','assignee_list','inventor_list',
                          'us_relation_list','us_classifications','ipcr_classifications',
                          'claims']

        self.xml = xh.root.patent_application_publication

        if filter(lambda x: not isinstance(x, list), self.xml.contents_of('country_code')):
            self.country = filter(lambda x: not isinstance(x, list), self.xml.contents_of('country_code'))[0]
        else:
            self.country = ''
        self.application = xml_util.normalize_document_identifier(self.xml.document_id.contents_of('doc_number')[0])
        self.kind = self.xml.document_id.contents_of('kind_code')[0]
        self.pat_type = None
        self.date_app = self.xml.document_id.contents_of('document_date')[0]
        self.clm_num = len(self.xml.subdoc_claims.claim)
        self.abstract = self.xml.subdoc_abstract.contents_of('paragraph', '', as_string=True, upper=False)
        self.invention_title = self._invention_title()

        self.app = {
            "id": self.application,
            "type": self.pat_type,
            "number": self.application,
            "country": self.country,
            "date": self._fix_date(self.date_app),
            "abstract": self.abstract,
            "title": self.invention_title,
            "kind": self.kind,
            "num_claims": self.clm_num
        }
        self.app["id"] = str(self.app["date"])[:4] + "/" + self.app["number"]
 def citation_list(self):
     """
     Returns a list of two lists. The first list is normal citations,
     the second is other citations.
     citation:
       date
       name
       kind
       country
       category
       number
       sequence
     OR
     otherreference:
       text
       sequence
     """
     citations = self.xml.references_cited.citation
     if not citations: return []
     regular_cits = []
     other_cits = []
     for i,citation in enumerate(citations):
         data = {}
         if citation.othercit:
             data['text'] = citation.contents_of('othercit', as_string=True)
             data['sequence'] = i
             other_cits.append(data)
         else:
             for tag in ['name','kind','category']:
                 data[tag] = citation.contents_of(tag, as_string=True)
             data['date'] = self._fix_date(citation.contents_of('date', as_string=True))
             data['country'] = citation.contents_of('country', default=[''])[0]
             doc_number = citation.contents_of('doc_number', as_string=True)
             data['number'] = xml_util.normalize_document_identifier(doc_number)
             data['sequence'] = i
             regular_cits.append(data)
     return [regular_cits, other_cits]
    def __init__(self, xml_string, is_string=False):
        xml_string = self._description_patch(xml_string)
        xml_string = self._claims_patch(xml_string)
        xml_string = self._abstract_patch(xml_string)
        
        xh = xml_driver.XMLHandler()
        self.xh = xh
        parser = xml_driver.make_parser()

        parser.setContentHandler(xh)
        parser.setFeature(xml_driver.handler.feature_external_ges, False)
        l = xml.sax.xmlreader.Locator()
        xh.setDocumentLocator(l)
        if is_string:
            parser.parse(StringIO(xml_string))
        else:
            parser.parse(xml_string)

        self.attributes = ['pat','app','description','assignee_list','patent','inventor_list','lawyer_list',
                     'us_relation_list','us_classifications','ipcr_classifications',
                     'citation_list','claims']

        self.xml = xh.root.us_patent_grant

        self.country = self.xml.publication_reference.contents_of('country', upper=False)[0]
        self.patent = xml_util.normalize_document_identifier(self.xml.publication_reference.contents_of('doc_number')[0])
        self.kind = self.xml.publication_reference.contents_of('kind')[0]
        self.date_grant = self.xml.publication_reference.contents_of('date')[0]
        if self.xml.application_reference:
            self.pat_type = self.xml.application_reference[0].get_attribute('appl-type', upper=False)
        else:
            self.pat_type = None
        self.date_app = self.xml.application_reference.contents_of('date')[0]
        self.country_app = self.xml.application_reference.contents_of('country')[0]
        self.patent_app = self.xml.application_reference.contents_of('doc_number')[0]
        self.code_app = self.xml.contents_of('us_application_series_code')[0]
        self.clm_num = self.xml.contents_of('number_of_claims')[0]
        self.abstract = xh.root.us_patent_grant.contents_of('abstract', '', as_string=True, upper=False)

        self.briefsummarydescription = xh.root.us_patent_grant.contents_of('brief_summary_description', '', as_string=True, upper=False, clean_text=False)
        self.briefdescriptiondrawings = xh.root.us_patent_grant.contents_of('brief_description_drawings', '', as_string=True, upper=False, clean_text=False)
        self.detaileddescription = xh.root.us_patent_grant.contents_of('detailed_description', '', as_string=True, upper=False, clean_text=False)
        self.otherpatentrelations = xh.root.us_patent_grant.contents_of('other_patent_relations', '', as_string=True, upper=False, clean_text=False)        
        
        self.invention_title = self._invention_title()
        
        self.pat = {
            "id": self.patent,
            "type": self.pat_type,
            "number": self.patent,
            "country": self.country,
            "date": self._fix_date(self.date_grant),
            "abstract": self.abstract,
            "title": self.invention_title,
            "kind": self.kind,
            "num_claims": self.clm_num
        }
        self.app = {
            "type": self.code_app,
            "number": self.patent_app,
            "country": self.country_app,
            "date": self._fix_date(self.date_app)
        }
        self.app["id"] = str(self.app["date"])[:4] + "/" + self.app["number"]
        
        self.description = {
            "id": self.patent,
            "briefsummarydescription": self.briefsummarydescription,
            "briefdescriptiondrawings": self.briefdescriptiondrawings,
            "detaileddescription": self.detaileddescription,
            "otherpatentrelations": self.otherpatentrelations
        }
    def __init__(self, xml_string, filename, is_string=False):
        xh = xml_driver.XMLHandler()
        parser = xml_driver.make_parser()
        parser.setContentHandler(xh)
        parser.setFeature(xml_driver.handler.feature_external_ges, False)
        l = xml.sax.xmlreader.Locator()
        xh.setDocumentLocator(l)
        if is_string:
            parser.parse(StringIO(xml_string))
        else:
            parser.parse(xml_string)

        self.attributes = [
            'app', 'application', 'assignee_list', 'inventor_list',
            'us_classifications', 'claims'
        ]

        self.xml = xh.root.patent_application_publication
        self.xml_string = xml_string

        if filter(lambda x: not isinstance(x, list),
                  self.xml.contents_of('country_code')):
            self.country = filter(lambda x: not isinstance(x, list),
                                  self.xml.contents_of('country_code'))[0]
        else:
            self.country = ''
        self.application = xml_util.normalize_document_identifier(
            self.xml.document_id.contents_of('doc_number')[0])
        self.kind = self.xml.document_id.contents_of('kind_code')[0]
        try:
            self.pat_type = type_kind[self.kind]
        except:
            self.pat_type = None
        self.date_app = self.xml.document_id.contents_of('document_date')[0]
        self.clm_num = len(self.xml.subdoc_claims.claim)
        #self.abstract = self.xml.subdoc_abstract.contents_of('paragraph', '', as_string=True, upper=False)
        try:
            self.abstract = re.search(
                '<subdoc-abstract>(.*?)</subdoc-abstract>', xml_string,
                re.DOTALL).group(1)
            self.abstract = re.sub('<.*?>|</.*?>', '', self.abstract)
            self.abstract = re.sub('[\n\t\r\f]+', '', self.abstract)
            self.abstract = re.sub('\s+', ' ', self.abstract)
            self.abstract = h.unescape(self.abstract)
        except:
            self.abstract = ''
        self.invention_title = h.unescape(self._invention_title())
        self.filename = re.search('i?pa[0-9]*.*$', filename, re.DOTALL).group()

        self.app = {
            "id": self.application,
            "type": self.pat_type,
            "number": self.application,
            "country": self.country,
            "date": self._fix_date(self.date_app),
            "abstract": self.abstract,
            "title": self.invention_title,
            "kind": self.kind,
            "num_claims": self.clm_num,
            "filename": self.filename
        }
        self.app["id"] = str(self.app["date"])[:4] + "/" + self.app["number"]