コード例 #1
0
 def __init__(self, input_file, output_file):
     self.logger = LogManager(__name__)
     self.spider = Spider()
     self.regex = Regex()
     self.utils = Utils()
     self.input_file = input_file
     self.output_file = output_file
コード例 #2
0
 def __init__(self, spider, memberList, subject, message):
     QThread.__init__(self)
     #        self.spider = Spider()
     self.spider = spider
     self.regex = Regex()
     self.memberList = memberList
     self.subject = unicode(subject)
     self.message = unicode(message)
コード例 #3
0
 def __init__(self):
     QObject.__init__(self)
     self.regex = Regex()
     self.title = ''
     self.webView = QWebView()
     self.webView.settings().setAttribute(QWebSettings.AutoLoadImages, True)
     self.webView.settings().setAttribute(QWebSettings.JavascriptEnabled, True)
     self.webView.settings().setAttribute(QWebSettings.PluginsEnabled, True)
     self.webView.settings().setAttribute(QWebSettings.DeveloperExtrasEnabled, True)
     self.pdfPrinter = QPrinter()
     self.webView.loadFinished.connect(self.convertToPdf)
コード例 #4
0
 def __init__(self, filename):
     self.logger = LogManager(__name__)
     self.spider = Spider()
     self.regex = Regex()
     self.utils = Utils()
     self.filename = filename
     self.url = 'http://topsy.com/s?'
     self.csvWriter = Csv('topsy.csv')
     csvDataHeader = [
         'Keyword', 'Tweets in last 30 days', 'Topsy Sentiment Score',
         ' Date of scrape'
     ]
     self.csvWriter.writeCsvRow(csvDataHeader)
コード例 #5
0
 def __init__(self):
     self.logger = LogManager(__name__)
     self.spider = Spider()
     self.browser = BrowserUtil()
     self.regex = Regex()
     self.utils = Utils()
     self.csvHeader = [
         'Category', 'Sub Category 1', 'Sub Category 2', 'Product Code',
         'Product Name', 'Product ShortName', 'Product Description',
         'List Price', 'Vendor Price', 'Availability', 'Power', 'Size',
         'KW', 'Weight(kg)', 'Other Tech', 'Pdf File', 'Image File'
     ]
     self.totalProducts = 0
コード例 #6
0
 def __init__(self, filename):
     self.logger = LogManager(__name__)
     self.spider = Spider()
     self.regex = Regex()
     self.utils = Utils()
     self.filename = filename
     self.url = 'https://www.google.com/finance?'
     self.main_url = 'https://www.google.com'
     self.csvWriter = Csv('google_finance.csv')
     csvDataHeader = [
         'Ticker Symbol', 'Quarter End', 'Revenue', 'Total Revenue',
         'Date of Scrape'
     ]
     self.csvWriter.writeCsvRow(csvDataHeader)
コード例 #7
0
 def __init__(self, spider, url, pageRange=None):
     QThread.__init__(self)
     #        self.spider = Spider()
     self.spider = spider
     self.regex = Regex()
     self.url = url
     self.startPage = None
     self.endPage = None
     if self.regex.isFoundPattern('(?i)(\d+)-(\d+)', str(pageRange).strip()):
         pageRangeFormat = self.regex.getSearchedDataGroups('(?i)(\d+)-(\d+)', str(pageRange).strip())
         self.startPage = int(pageRangeFormat.group(1))
         self.endPage = int(pageRangeFormat.group(2))
     elif self.regex.isFoundPattern('(?i)(\d+)', str(pageRange).strip()):
         pageRangeFormat = self.regex.getSearchedDataGroups('(?i)(\d+)', str(pageRange).strip())
         self.startPage = int(pageRangeFormat.group(1))
         self.endPage = self.startPage
コード例 #8
0
 def __init__(self):
     QThread.__init__(self)
     self.logger = LogManager(__name__)
     self.spider = Spider()
     self.regex = Regex()
     self.utils = Utils()
     self.mainUrl = 'http://www.paodeacucar.com.br/'
     self.url = 'http://www.paodeacucar.com.br/'
     dupCsvReader = Csv()
     self.dupCsvRows = dupCsvReader.readCsvRow('paodeacucar.csv', 4)
     self.csvWriter = Csv('paodeacucar.csv')
     csvDataHeader = ['SKU', 'Category', 'Subcategory', 'Name', 'URL', 'URL Image', 'Details',
                      'Nutrients Table html code', 'Price from, 28/abr/14', '28/abr/14']
     if 'URL' not in self.dupCsvRows:
         self.dupCsvRows.append(csvDataHeader)
         self.csvWriter.writeCsvRow(csvDataHeader)
コード例 #9
0
ファイル: MainWindow.py プロジェクト: rabbicse/py-web2pdf
 def __init__(self, parent=None):
     super(MainForm, self).__init__(parent)
     self.regex = Regex()
     self.alreadyClickedA = False
     self.alreadyClickedB = False
     self.fileDir = None
     self.fileDirB = None
     self.fileName = None
     self.fileNameB = None
     self.totalUrlA = 0
     self.totalUrlB = 0
     self.currentUrlA = 0
     self.currentUrlB = 0
     self.pdfCounter = 1
     self.pdfCounterB = 1
     self.typeName = 'B'
     self.setupUI()
コード例 #10
0
 def __init__(self):
     self.browser = None
     self.url = "http://environmentclearance.nic.in/Search.aspx"
     self.statuses = []
     self.categories = []
     self.years = []
     self.states = []
     self.csvDataHeader = [
         'Status', 'Category', 'Year', 'State', 'Serial No',
         'Proposal details', 'Location', 'Important Date', 'Category',
         'Company Proponent'
     ]
     self.logger = LogManager(__name__)
     self.regex = Regex()
     dupCsvReader = Csv()
     self.dupCsvRows = dupCsvReader.readCsvRow('env_clearance.csv')
     self.csvWriter = Csv('env_clearance.csv')
     if self.csvDataHeader not in self.dupCsvRows:
         self.csvWriter.writeCsvRow(self.csvDataHeader)
         self.dupCsvRows.append(self.csvDataHeader)
コード例 #11
0
    def downloadFile(self, url, downloadPath, proxyHandler=None):
        try:
            regex = Regex()
            opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(),
                                          urllib2.HTTPHandler(debuglevel=0),
                                          urllib2.HTTPSHandler(debuglevel=0))
            opener.addheaders = [
                config.USER_AGENT,
                ('Accept',
                 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
                 ), ('Connection', 'keep-alive')
            ]

            if proxyHandler is not None:
                opener.add_handler(proxyHandler)
            resp = urllib2.urlopen(url, timeout=30)
            contentLength = resp.info()['Content-Length']
            contentLength = regex.getSearchedData('(?i)^(\d+)', contentLength)
            totalSize = float(contentLength)
            directory = os.path.dirname(downloadPath)
            if not os.path.exists(directory):
                os.makedirs(directory)
            dl_file = open(downloadPath, 'wb')
            currentSize = 0
            CHUNK_SIZE = 32768
            while True:
                data = resp.read(CHUNK_SIZE)
                if not data:
                    break
                currentSize += len(data)
                dl_file.write(data)

                print('============> ' +
                      str(round(float(currentSize * 100) / totalSize, 2)) +
                      '% of ' + str(totalSize) + ' bytes')
                if currentSize >= totalSize:
                    dl_file.close()
                    return True
        except Exception, x:
            print x
コード例 #12
0
    def __init__(self):
        QtCore.QThread.__init__(self)
        self.isExiting = False
        self.logger = LogManager(__name__)
        self.spider = Spider()
        self.regex = Regex()
        dupCsvReader = Csv()
        self.dupCsvRows = dupCsvReader.readCsvRow('nisbets.csv', 0)
        self.csvWriter = Csv('nisbets.csv')
        self.mainUrl = 'http://www.nisbets.co.uk'
        csvHeaderList = [
            'URL', 'Product Code', 'Product Technical Specifications',
            'Product Name', 'Brand', 'Product Price',
            'Product Short Description', 'Product Long Description',
            'Image File Name', 'User Manual File Name',
            'Exploded View File Name', 'Spares Code', 'Accessories',
            'Product Status'
            'Category1', 'Category2', 'Category3', 'Category4'
        ]
        if 'URL' not in self.dupCsvRows:
            self.csvWriter.writeCsvRow(csvHeaderList)
            self.dupCsvRows.append(csvHeaderList[0])

        self.utils = Utils()
コード例 #13
0
 def __init__(self):
     self.regex = Regex()
コード例 #14
0
 def __init__(self, username, password):
     QThread.__init__(self)
     self.spider = Spider()
     self.regex = Regex()
     self.username = username
     self.password = password