예제 #1
0
    def parse(self, response):
        l = ItemLoader(item=FinanceItem(), response=response)
        l.add_xpath("CompanyName", '//*[@id="companyheader"]/div[1]/h3/text()',
                    MapCompose(unicode.strip,
                               unicode.title))  #needs return value to output
        l.add_xpath("StockExchangeAndCode",
                    '//*[@id="companyheader"]/div[1]/text()[1]',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("Currency", '//*[@id="ref_6826782_elt"]/div/div/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("StockPrice", '//*[@id="ref_6826782_l"]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "MarketCap",
            '//*[@id="market-data-div"]/div[2]/div[1]/table[1]/tr[5]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "PE",
            '//*[@id="market-data-div"]/div[2]/div[1]/table[1]/tr[6]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "EPS",
            '//*[@id="market-data-div"]/div[2]/div[1]/table[2]/tr[2]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "Shares",
            '//*[@id="market-data-div"]/div[2]/div[1]/table[2]/tr[3]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "InstOwn",
            '//*[@id="market-data-div"]/div[2]/div[1]/table[2]/tr[5]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))

        return l.load_item()
예제 #2
0
 def parse(self, response):
     sel = HtmlXPathSelector(response)
     torrent = FinanceItem()
     result = sel.select(
         "//table[@class='yfnc_modtitle1'][1]/following::p/text()").extract(
         )
     torrent['description'] = str(result[0].split('.')[0])
     torrent['description'] += result[0].split('.')[1]
     self.risultato = torrent['description']
     return torrent
예제 #3
0
 def parse_item(self, response):
     item = FinanceItem()
     text = response.xpath('//div[@class="article article_16"]/p/text()').extract()
     content = []
     for t in text:
         content.append(["p", t])
     item['content'] = content
     item['source']  = 'sina'
     item['datetime']    = response.xpath('//div[@class="page-info"]/span[@class="time-source"]/text()').extract()#[0]
     item['title']   = response.xpath("/html/head/title/text()").extract()[0]
     item['href']    = response.url
     item['type']    = 'sina'
   
     yield item
예제 #4
0
    def parse_item(self, response):
        item = FinanceItem()
        text	= response.xpath('//div[@class="Body"]/p/text()').extract()
        content = []
        for t in text:
            content.append(["p", t])
        item['content'] = content
        item['source'] = 'eastmoney'
        item['datetime'] = response.xpath('//div[@class="time"]/text()').extract()[0]
        item['title'] = response.xpath('//div[@class="newsContent"]/h1/text()').extract()[0]
        item['href'] = response.url
        item['type'] = 'eastmoney'
 
        yield item
예제 #5
0
    def parse_item(self, response):
        item = FinanceItem()
        text = response.xpath('//div[@class="Cnt-Main-Article-QQ"]/p/text()').extract()
        content = []
        for t in text:
            content.append(["p", t])
        item['content'] = content
        item['source']  = 'qq'
        item['datetime']    = response.xpath('//div[@class="a_Info"]/span[@class="a_time"]/text()').extract()[0]
        item['title']   = response.xpath("/html/head/title/text()").extract()[0]
        item['href']    = response.url
        item['type']   = 'qq'

        return item
예제 #6
0
    def parse_item(self, response):
        item = FinanceItem()
        text = response.xpath('//div[@class="content"]/p/text()').extract()
        content = []
        for t in text:
            content.append(["p", t])
        item['content'] = content
        item['source'] = 'cnstock'
        item['datetime'] = response.xpath(
            '//div[@class="bullet"]/span[@class="timer"]/text()').extract()[0]
        item['title'] = response.xpath("/html/head/title/text()").extract()[0]
        item['href'] = response.url
        item['type'] = 'cnstock'

        yield item
예제 #7
0
    def parse(self, response):
        l = ItemLoader(item=FinanceItem(), response=response)
        l.add_xpath("CompanyName", '//*[@id="companyheader"]/div[1]/h3/text()', MapCompose(unicode.strip, unicode.title))  # needs return value to output
        l.add_xpath("StockExchangeAndCode", '//*[@id="companyheader"]/div[1]/text()[1]', MapCompose(unicode.strip, unicode.title))
        l.add_xpath("NetProfitMargin", '//*[@id="gf-viewc"]/div/div/div[3]/div[1]/div/div[4]/table/tr[1]/td[3]/text()', MapCompose(unicode.strip, unicode.title))
        l.add_xpath("OperatingMargin", '//*[@id="gf-viewc"]/div/div/div[3]/div[1]/div/div[4]/table/tr[2]/td[3]/text()', MapCompose(unicode.strip, unicode.title))
        l.add_xpath("EBITDMargin", '//*[@id="gf-viewc"]/div/div/div[3]/div[1]/div/div[4]/table/tr[3]/td[3]', MapCompose(unicode.strip, unicode.title))
        l.add_xpath("ReturnOnAssets", '//*[@id="gf-viewc"]/div/div/div[3]/div[1]/div/div[4]/table/tr[4]/td[3]/text()', MapCompose(unicode.strip, unicode.title))
        l.add_xpath("ReturnOnEquity", '//*[@id="gf-viewc"]/div/div/div[3]/div[1]/div/div[4]/table/tr[5]/td[3]/text()', MapCompose(unicode.strip, unicode.title))
        l.add_xpath("Employees", '//*[@id="gf-viewc"]/div/div/div[3]/div[1]/div/div[4]/table/tr[5]/td[3]/text()', MapCompose(unicode.strip, unicode.title))
        l.add_xpath("CDPScore", '//*[@id="gf-viewc"]/div/div/div[3]/div[1]/div/div[4]/table/tr[7]/td[3]', MapCompose(unicode.strip, unicode.title))



        return l.load_item()
예제 #8
0
    def parse_item(self, response):
        print response.url, '---------------------'
        item = FinanceItem()
        text = response.xpath('//p/text()').extract()
        content = []
        for t in text:
            content.append(["p", t])
        item['content'] = content
        item['source'] = '10jqka'
        item['datetime'] = response.xpath(
            '//div[@class="date"]/span/text()').extract()[0][:19]
        item['title'] = response.xpath("/html/head/title/text()").extract()[0]
        item['href'] = response.url
        item['type'] = '10jqka'

        yield item
예제 #9
0
    def parse(self, response):
        l = ItemLoader(item=FinanceItem(), response=response)
        l.add_xpath(
            "Currency",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/thead/tr/th[1]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "TimePeriod",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/thead/tr/th[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "Revenue",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[1]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "OtherRevenueTotal",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[2]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "TotalRevenue",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[3]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "CostOfRevenueTotal",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[4]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "GrossProfit",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[5]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "SellingGeneralAdminExpensesTotal",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[6]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "ResearchAndDevelopment",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[7]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "DepreciationAmortization",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[8]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "InterestExpenseIncome",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[9]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "UnusualExpenseIncome",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[10]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "OtherOperatingExpenses",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[11]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "TotalOperatingExpenses",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[12]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "OperatingIncome",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[13]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "InterestIncomeExpense",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[14]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "GainLossOnSaleOfAssets",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[15]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "OtherNet",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[16]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "IncomeBeforeTax",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[17]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "IncomeAfterTax",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[18]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "MinorityInterest",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[19]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "EquityInAffiliates",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[20]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "NetIncomeBeforeExtraItems",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[21]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "AccountingChange",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[22]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "DiscontinuedOperations",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[23]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "ExtraordinaryItem",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[24]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "NetIncome",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[25]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "PreferredDividends",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[26]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "IncomeAvailabletoCommonExclExtraItems",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[27]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "IncomeAvailabletoCommonInclExtraItems",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[28]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "BasicWeightedAverageShares",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[29]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "BasicEPSExcludingExtraordinaryItems",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[30]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "BasicEPSIncludingExtraordinaryItems",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[31]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "DilutionAdjustment",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[32]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "DilutedWeightedAverageShares",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[33]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "DilutedEPSExcludingExtraordinaryItems",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[34]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "DilutedEPSIncludingExtraordinaryItems",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[35]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "DividendsperShareCommonStockPrimaryIssue",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[36]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "GrossDividendsCommonStock",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[37]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "NetIncomeAfterStockBasedCompExpense",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[38]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "BasicEPSAfterStockBasedCompExpense",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[39]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "DilutedEPSAfterStockBasedCompExpense",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[40]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "DepreciationSupplemental",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[41]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "TotalSpecialItems",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[42]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "NormalizedIncomeBeforeTaxes",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[43]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "EffectOfSpecialItemsOnIncomeTaxes",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[44]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "IncomeTaxesExcludingImpactOfSpecialItems",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[45]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "NormalizedIncomeAfterTaxes",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[46]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "NormalizedIncomeAvailToCommon",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[47]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "BasicNormalizedEPS",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[48]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "DilutedNormalizedEPS",
            '//div[@id="incinterimdiv"]//*[@id="fs-table"]/tbody/tr[49]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))

        return l.load_item()
예제 #10
0
    def parse(self, response):
        l = ItemLoader(item=FinanceItem(), response=response)
        l.add_xpath("Currency", '//div[@id="balannualdiv"]//*[@id="fs-table"]/thead/tr/th[1]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("TimePeriod", '//div[@id="balannualdiv"]//*[@id="fs-table"]/thead/tr/th[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("CashAndEquivalents", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[1]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("ShortTermInvestments", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[2]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("CashAndShortTermInvestments", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[3]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("AccountsReceivableTradeNet", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[4]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("ReceivablesOther", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[5]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("TotalReceivablesNet", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[6]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("TotalInventory", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[7]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("PrepaidExpenses", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[8]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("OtherCurrentAssetsTotal", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[9]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("TotalCurrentAssets", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[10]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("PropertyPlantEquipmentTotalGross", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[11]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("AccumulatedDepreciationTotal", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[12]/td[2]/span/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("GoodwillNet", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[13]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("IntangiblesNet", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[14]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("LongTermInvestments", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[15]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("OtherLongTermAssetsTotal", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[16]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("TotalAssets", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[17]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("AccountsPayable", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[18]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("AccruedExpenses", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[19]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("NotesPayableShortTermDebt", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[20]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("CurrentPortofLTDebtCapitalLeases", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[21]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("OtherCurrentliabilitiesTotal", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[22]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("TotalCurrentLiabilities", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[23]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("LongTermDebt", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[24]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("CapitalLeaseObligations", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[25]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("TotalLongTermDebt", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[26]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("TotalDebt", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[27]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("DeferredIncomeTax", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[28]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("MinorityInterest", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[29]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("OtherLiabilitiesTotal", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[30]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("TotalLiabilities", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[31]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("RedeemablePreferredStockTotal", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[32]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("PreferredStockNonRedeemableNet", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[33]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("CommonStockTotal", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[34]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("AdditionalPaidInCapital", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[35]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("RetainedEarningsAccumulatedDeficit", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[36]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("TreasuryStockCommon", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[37]/td[2]/span/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("OtherEquityTotal", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[38]/td[2]/span/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("TotalEquity", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[39]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("TotalLiabilitiesShareholdersEquity", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[40]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("SharesOutsCommonStockPrimaryIssue", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[41]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))
        l.add_xpath("TotalCommonSharesOutstanding", '//div[@id="balannualdiv"]//*[@id="fs-table"]/tbody/tr[42]/td[2]/text()',
                    MapCompose(unicode.strip, unicode.title))




        return l.load_item()
예제 #11
0
    def parse(self, response):
        l = ItemLoader(item=FinanceItem(), response=response)
        l.add_xpath(
            "Currency",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/thead/tr/th[1]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "TimePeriod",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/thead/tr/th[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "NetIncomeStartingLine",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[1]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "DepreciationDepreciation",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[2]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "Amortization",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[3]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "DeferredTaxes",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[4]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "NonCashItems",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[5]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "ChangesInWorkingCapital",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[6]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "CashFromOperatingActivities",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[7]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "CapitalExpenditures",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[8]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "OtherInvestingCashFlowItemsTotal",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[9]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "CashFromInvestingActivities",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[10]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "FinancingCashFlowItems",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[11]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "TotalCashDividendsPaid",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[12]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "IssuanceRetirementofStockNet",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[13]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "IssuanceRetitementOfDebtNet",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[14]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "CashFromFinancingActivities",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[15]/td[2]/span/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "ForeignExchangeEffects",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[16]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "NetChangeInCash",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[17]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "CashInterestPaidSupplemental",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[18]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))
        l.add_xpath(
            "CashTaxesPaidSupplemental",
            '//div[@id="casinterimdiv"]//*[@id="fs-table"]/tbody/tr[19]/td[2]/text()',
            MapCompose(unicode.strip, unicode.title))

        return l.load_item()
예제 #12
0
 def parse(self, response):
     pdf = PyPDF2.PdfFileReader(io.BytesIO(response.body))
     pages = pdf.getNumPages()
     # start = end = 0
     # output = []
     date_created = None
     for page in range(pages):
         text = pdf.getPage(page).extractText()
         if not date_created:
             if re.search('\(as of (\d+/\d+/\d+)[ ]*\)', text):
                 date_created = re.search('\(as of (\d+/\d+/\d+)[ ]*\)',
                                          text).group(1)
         else:
             break
     #     for n, string in enumerate(text.split('\n')):
     #         if '$ total' in string or 'No estimated extra taxable distribution required' in string:
     #             start = n + 1
     #         if '   ' in string or '(over)' in string:
     #             end = n
     #             break
     #     text = text.split('\n')[start:end]
     #     cleaned_text = []
     #     for string in text:
     #         if '%' in string:
     #             cleaned_text.extend([s.strip('$') for s in string.split('%')])
     #         else:
     #             cleaned_text.append(string.strip('$'))
     #
     #     for n in range(0, len(cleaned_text), 11):
     #         output.append(cleaned_text[n:n+11])
     output = []
     tables = tabula.read_pdf(io.BytesIO(response.body),
                              pages="all",
                              multiple_tables=True,
                              stream=True)
     for table in tables:
         if len(table) < 1:
             continue
         for ix in table.index:
             output_row = []
             row = table.loc[ix]
             for n, cell in enumerate(row):
                 if n == 0:
                     if (pd.isna(cell) or cell == 'Putnam fund name'):
                         break
                     else:
                         output_row.append(cell)
                 else:
                     for s in cell.split():
                         output_row.append(s.strip('$%'))
             if output_row:
                 output.append(output_row)
     for row in output:
         if row[0].startswith(':'):
             row[0] = row[0][1:]
         item = FinanceItem()
         item['firm_name'] = 'Putnam'
         item['fund_name'] = row[0]
         item['ex_date'] = row[1]
         item['pay_date'] = row[2]
         per_share_total = float(row[-1])
         item['short_term_gain'] = float(row[-4])
         try:
             item['short_term_gain_pct'] = item[
                 'short_term_gain'] / per_share_total
         except ZeroDivisionError:
             item['short_term_gain_pct'] = 0
         item['long_term_gain'] = float(row[-3])
         try:
             item['long_term_gain_pct'] = item[
                 'long_term_gain'] / per_share_total
         except:
             item['long_term_gain_pct'] = 0
         item['record_date'] = date_created
         item['source_url'] = response.url
         yield item