def parse_text(self, doc, **kwargs):
        # CUSIP
        cusip = None
        cusip_pattern = r"\w{4,6}(\s*|-*)\w{2,3}-*\w{0,1}"
        pat = r"(" + cusip_pattern + r")\W+\(CUSIP Number\)"
        match = re.compile(pat, re.IGNORECASE | re.MULTILINE).search(doc)
        if match:
            cusip = match.group(1)
        else:
            pat = r"cusip (no|number|num)\.*:*\W*(\w{6}\W*\w{3})"
            match = re.compile(pat, re.IGNORECASE | re.MULTILINE).search(doc)
            if match:
                cusip = match.group(2)
            else:
                logging.warning("No match for cusip")

        # Address
        address = parse_address(doc)

        # Issue
        issue_name = None
        pat = re.compile(
            r"\(Name\s+of\s+Issuer\)([\w\W]+?)-*\s+\(Title\s+of\s+Class\s+of\s+Securities\)",
            re.IGNORECASE | re.MULTILINE,
        )
        match = pat.search(doc)
        if match:
            issue_name = match.group(1).strip()
        else:
            pat = re.compile(
                r"Title\s+of\s+Class\s+of\s+Securities:\s+([\w\W]+?)\s+(Item 2\(e\)\.*)?\s+CUSIP\s+Number:",
                re.IGNORECASE | re.MULTILINE,
            )
            match = pat.search(doc)
            if match:
                issue_name = match.group(1).strip()
            else:
                logging.warning("No match for issue name")

        # Issuer
        issuer_name = None
        pat = re.compile(
            r"\(Amendment\s+No\.*:*\s+[\w\W]*?\)\*?\s+(Under\s+the\s+Securities\s+Exchange\s+Act\s+of\s+1934)?([\w\W]+?)-*\(Name\s+of\s+Issuer\)",
            re.IGNORECASE | re.MULTILINE,
        )
        match = pat.search(doc)
        if match:
            issuer_name = match.group(2).strip()
            match = re.match(r"(.*?)\s*-+$", issuer_name)
            if match:
                issuer_name = match.group(1).strip()
        else:
            pat = re.compile(r"Item\s+1\(a\)\.*\s*-*\s*Name\s+of\s+Issuer:([\w\W]+?)Item", re.IGNORECASE | re.MULTILINE)
            match = pat.search(doc)
            if match:
                issuer_name = match.group(1).strip()
            else:
                logging.warning("No match for issuer name in SC13G/A")

        return {"cusip": cusip, "address": address, "issue_name": issue_name, "issuer_name": issuer_name}
    def parse_text(self, text, **kwargs):
        cusip_number = self.extract_cusip(text)

        address = parse_address(text)

        # Issue name
        issue_name = self.extract_issue_name(text)

        # Issuer name
        issuer_name = self.extract_issuer_name(text)

        if not issuer_name:
            logging.warning("No match for issuer name in SC13G/A")

        return {'cusip': cusip_number, 'address': address, 'issue_name' : issue_name, 'issuer_name' : issuer_name}
Example #3
0
    def parse_text(self, text, **kwargs):
        cusip_number = self.extract_cusip(text)

        address = parse_address(text)

        # Issue name
        issue_name = self.extract_issue_name(text)

        # Issuer name
        issuer_name = self.extract_issuer_name(text)

        if not issuer_name:
            logging.warning("No match for issuer name in SC13G/A")

        return {
            'cusip': cusip_number,
            'address': address,
            'issue_name': issue_name,
            'issuer_name': issuer_name
        }
Example #4
0
    def parse_text(self, doc, **kwargs):
        # CUSIP
        cusip = None
        cusip_pattern = r'\w{4,6}(\s*|-*)\w{2,3}-*\w{0,1}'
        pat = r'(' + cusip_pattern + r')\W+\(CUSIP Number\)'
        match = re.compile(pat, re.IGNORECASE | re.MULTILINE).search(doc)
        if match:
            cusip = match.group(1)
        else:
            pat = r'cusip (no|number|num)\.*:*\W*(\w{6}\W*\w{3})'
            match = re.compile(pat, re.IGNORECASE | re.MULTILINE).search(doc)
            if match:
                cusip = match.group(2)
            else:
                logging.warning("No match for cusip")

        # Address
        address = parse_address(doc)

        # Issue
        issue_name = None
        pat = re.compile(
            r"\(Name\s+of\s+Issuer\)([\w\W]+?)-*\s+\(Title\s+of\s+Class\s+of\s+Securities\)",
            re.IGNORECASE | re.MULTILINE)
        match = pat.search(doc)
        if match:
            issue_name = match.group(1).strip()
        else:
            pat = re.compile(
                r'Title\s+of\s+Class\s+of\s+Securities:\s+([\w\W]+?)\s+(Item 2\(e\)\.*)?\s+CUSIP\s+Number:',
                re.IGNORECASE | re.MULTILINE)
            match = pat.search(doc)
            if match:
                issue_name = match.group(1).strip()
            else:
                logging.warning("No match for issue name")

        # Issuer
        issuer_name = None
        pat = re.compile(
            r"\(Amendment\s+No\.*:*\s+[\w\W]*?\)\*?\s+(Under\s+the\s+Securities\s+Exchange\s+Act\s+of\s+1934)?([\w\W]+?)-*\(Name\s+of\s+Issuer\)",
            re.IGNORECASE | re.MULTILINE)
        match = pat.search(doc)
        if match:
            issuer_name = match.group(2).strip()
            match = re.match(r'(.*?)\s*-+$', issuer_name)
            if match:
                issuer_name = match.group(1).strip()
        else:
            pat = re.compile(
                r'Item\s+1\(a\)\.*\s*-*\s*Name\s+of\s+Issuer:([\w\W]+?)Item',
                re.IGNORECASE | re.MULTILINE)
            match = pat.search(doc)
            if match:
                issuer_name = match.group(1).strip()
            else:
                logging.warning("No match for issuer name in SC13G/A")

        return {
            'cusip': cusip,
            'address': address,
            'issue_name': issue_name,
            'issuer_name': issuer_name
        }