Esempio n. 1
0
    def download_oneday(self, relpath, dateobj):
        dls = []
        datestr = utils.dateobj_to_str(dateobj, '-')
        searchurl = self.searchurl % (datestr, datestr)
        response = self.download_url(searchurl)
        if not response or not response.webpage:
            self.logger.warn('Could not download search result for date %s', \
                              dateobj)
            return dls

        d = utils.parse_webpage(response.webpage, self.parser)
        if not d:
            self.logger.warn('Could not parse search result for date %s', \
                              dateobj)
            return dls

        minfos = self.parse_results(d, dateobj)
        for metainfo in minfos:
            if 'download' not in metainfo:
                self.logger.warn('No link. Ignoring metainfo: %s', metainfo)
                continue
            relurl = self.download_gazette(metainfo, searchurl, relpath)
            if relurl:
                dls.append(relurl)
        return dls
Esempio n. 2
0
    def get_date_url(self, dateobj):
        qs = [ \
              ('datef', utils.dateobj_to_str(dateobj, '-', reverse = True)), \
              ('datet', utils.dateobj_to_str(dateobj, '-', reverse = True)), \
              ('selfday', utils.pad_zero(dateobj.day)), \
              ('selfmonth', utils.pad_zero(dateobj.month)), \
              ('selfyear', utils.pad_zero(dateobj.year)), \
              ('seltday', utils.pad_zero(dateobj.day)), \
              ('seltmonth', utils.pad_zero(dateobj.month)), \
              ('seltyear', utils.pad_zero(dateobj.year)), \
              ('B1', 'Search')  \
             ]  
        query = string.join(['%s=%s' % (q[0], q[1]) for q in qs], '&')

        dateurl = self.courturl + 'dojqry.asp' + '?' + query
        return dateurl
Esempio n. 3
0
    def get_date_url(self, dateobj):
        qs = [ \
              ('datef', utils.dateobj_to_str(dateobj, '-', reverse = True)), \
              ('datet', utils.dateobj_to_str(dateobj, '-', reverse = True)), \
              ('selfday', utils.pad_zero(dateobj.day)), \
              ('selfmonth', utils.pad_zero(dateobj.month)), \
              ('selfyear', utils.pad_zero(dateobj.year)), \
              ('seltday', utils.pad_zero(dateobj.day)), \
              ('seltmonth', utils.pad_zero(dateobj.month)), \
              ('seltyear', utils.pad_zero(dateobj.year)), \
              ('B1', 'Search')  \
             ]
        query = string.join(['%s=%s' % (q[0], q[1]) for q in qs], '&')

        dateurl = self.courturl + 'dojqry.asp' + '?' + query
        return dateurl
Esempio n. 4
0
    def get_post_data(self, tags, dateobj):
        datestr  = utils.dateobj_to_str(dateobj, '/')
        postdata = []

        for tag in tags:
            name  = None
            value = None

            if tag.name == 'input':
                name  = tag.get('name')
                value = tag.get('value')
                t     = tag.get('type')
                if t == 'image' or name == 'ctl00$CPH$btnReset':
                    continue

                if name == 'ctl00$CPH$txtToDate' or \
                        name == 'ctl00$CPH$txtfromDate':
                    value = datestr
                elif name == 'ctl00$CPH$btnSearch':
                    value = 'Search'
            elif tag.name == 'select':
                name = tag.get('name')
                if name == 'ctl00$CPH$ddldivision':
                    value = '-----Select----'    
                elif name == 'ctl00$CPH$ddlSection':
                    value =  '-----Select-----'
            if name:
                if value == None:
                    value = u''
                postdata.append((name, value))
        return postdata
Esempio n. 5
0
    def download_oneday(self, relpath, dateobj):
        getdata = [('type', 'datecreated'), ('order', 'ASC'), ('rpp', '20'), \
                  ('value', utils.dateobj_to_str(dateobj, '-', reverse = True))]

        url = self.courturl + 'browse?' + \
                          '&'.join(['%s=%s' % (x[0], x[1]) for x in getdata])

        return self.result_page(relpath, url, dateobj, {})
Esempio n. 6
0
    def download_oneday(self, relpath, dateobj):
        getdata = [('type', 'datecreated'), ('order', 'ASC'), ('rpp', '20'), \
                  ('value', utils.dateobj_to_str(dateobj, '-', reverse = True))]

        url = self.courturl + 'browse?' + \
                          '&'.join(['%s=%s' % (x[0], x[1]) for x in getdata])

        return self.result_page(relpath, url, dateobj, {})
Esempio n. 7
0
    def download_oneday(self, relpath, dateobj):
        getdata = [
            ("type", "datecreated"),
            ("order", "ASC"),
            ("rpp", "20"),
            ("value", utils.dateobj_to_str(dateobj, "-", reverse=True)),
        ]

        url = self.courturl + "browse?" + "&".join(["%s=%s" % (x[0], x[1]) for x in getdata])

        return self.result_page(relpath, url, dateobj, {})
Esempio n. 8
0
    def get_post_data(self, dateobj):
        curr_date = utils.dateobj_to_str(datetime.date.today(), '/')
        datestr = utils.dateobj_to_str(dateobj, '')

        postdata = [\
            ('displaytable_length',   '-1'), \
            ('mode',                  'unspecified'),  \
            ('property(abstract)',    ''  ), \
            ('property(docid)',       ''), \
            ('property(fromdate)',    datestr), \
            ('property(gazetteno)',   ''), \
            ('property(hdnCurDate)',  curr_date), \
            ('property(jobno)',       ''), \
            ('property(month1)',      '0'), \
            ('property(search)',      'searchGazette'), \
            ('property(searchmode)',  'date'), \
            ('property(todate)',      datestr), \
            ('property(year)',       '0'), \
            ('property(year1)',       '0'), \
        ]
        return postdata
Esempio n. 9
0
 def get_post_data(self, dateobj):
     datestr = utils.dateobj_to_str(dateobj, '/')
     postdata = [\
         ('cmb_Cat', '-1'), ('cmb_Name', '-1'), ('cmb_Not_For', '-1'),     \
         ('ComboDept', '-1'), ('eAttachId', ''), ('freetextradio', 'No'),  \
         ('NewSearchFlag', 'false'), ('PriorityName', '--Select--'),       \
         ('refDocId', ''), ('reportEndIndex', '10'),                       \
         ('reportStartIndex', '1'), ('txtEmail', ''), ('txtFreeText', ''), \
         ('txtFrom', datestr), ('txtGazetteNo', ''), ('txtNotNo', ''),     \
         ('txtNotTitle', ''), ('txtTo', datestr), \
     ]
     return postdata
Esempio n. 10
0
 def get_query_tuples(self, dateobj, zone, city):
     datestr = utils.dateobj_to_str(dateobj, '/')
     qtuples = [ \
         ('subAction', 'showReoprt'), \
         ('__report', 'pronouncementOrderReport1_%s.rptdesign' % zone), \
         ('City', city),                 \
         ('searchWhat', 'searchByDate'), \
         ('Serial No', ''),              \
         ('Appeal No', ''),              \
         ('Assessee Name', ''),          \
         ('AssType', 'null'),            \
         ('Order Date', datestr),        \
         ('Member Name', ''),            \
         ('Pronouncement Date', ''),     \
     ]
     return qtuples
Esempio n. 11
0
 def get_query_tuples(self, dateobj, zone, city):
     datestr = utils.dateobj_to_str(dateobj, '/')
     qtuples = [ \
         ('subAction', 'showReoprt'), \
         ('__report', 'pronouncementOrderReport1_%s.rptdesign' % zone), \
         ('City', city),                 \
         ('searchWhat', 'searchByDate'), \
         ('Serial No', ''),              \
         ('Appeal No', ''),              \
         ('Assessee Name', ''),          \
         ('AssType', 'null'),            \
         ('Order Date', datestr),        \
         ('Member Name', ''),            \
         ('Pronouncement Date', ''),     \
     ]
     return qtuples
Esempio n. 12
0
    def download_oneday(self, relpath, dateobj):
        newdls  = []

        datestr = utils.dateobj_to_str(dateobj, '/')
        subrelpath = '/'.join(relpath.split('/')[:-1])

        postdata = [('hcjudgecode', ''), ('fromdate', datestr), \
                    ('todate', datestr), ('counter', '1')]

        webpage = self.download_url (self.pageurl, referer = self.baseurl, \
                                     loadcookies = self.cookiefile.name, \
                                     postdata = postdata)

        if not webpage:
            self.logger.warning(u'No webpage for %s' % self.pageurl)            
            return newdls

        d = utils.parse_webpage(webpage)
        if not d:
            self.logger.error(u'Could not parse html of the result page for date %s' % dateobj)
            return newdls

        trs = d.findAll('tr')
        for tr in trs:
            if tr.find('th'):
                continue

            onclick = tr.get('onclick')
            if not onclick:
                self.logger.info(u'No onclick in %s' % tr)
                continue

            reobj = re.search('\d+', onclick) 
            if not reobj:
                continue

            ccin = reobj.group(0)
            webpage = self.download_url (self.caseurl, referer = self.baseurl, \
                                         loadcookies = self.cookiefile.name, \
                                         postdata = [('ccin', ccin)])

            if not webpage:
                self.logger.error(u'Could not get case for %s on date %s' % (ccin, dateobj))
                continue
            newdls.extend(self.download_orders(subrelpath, ccin, dateobj, webpage))
        return newdls
Esempio n. 13
0
    def get_post_data(self, tags, dateobj):
        datestr = utils.dateobj_to_str(dateobj, '/')
        postdata = []
        gztype = None
        for tag in tags:
            name = None
            value = None
            if tag.name == 'input':
                name = tag.get('name')
                value = tag.get('value')
                t = tag.get('type')
                if t == 'image' or name in [
                        'ctl00$ContentPlaceHolder1$TxtGazetteNo',
                        'ctl00$ContentPlaceHolder1$BtnCancel'
                ]:
                    continue
                if name == 'ctl00$ContentPlaceHolder1$TYPE':
                    if gztype != None:
                        continue
                    else:
                        value = 'RadioButton1'
                        gztype = value

                if name == 'ctl00$ContentPlaceHolder1$BtnSearch':
                    value = 'Search'

                if name == 'ctl00$ContentPlaceHolder1$CheckBoxYearAll':
                    value = 'on'

                if name == 'ctl00$ContentPlaceHolder1$TextBox2' or name == 'ctl00$ContentPlaceHolder1$TextBox1':
                    value = datestr

            elif tag.name == 'select':
                name = tag.get('name')
                if name in ['ctl00$ContentPlaceHolder1$ddlYear']:
                    continue
                if name == 'ctl00$ContentPlaceHolder1$ddlFilter':
                    value = '1'

            if name:
                if value == None:
                    value = u''
                postdata.append((name, value))

        return postdata
Esempio n. 14
0
    def get_post_data(self, dateobj):
        datestr = utils.dateobj_to_str(dateobj, '')

        postdata = [\
            ('mode',                  'unspecified'),  \
            ('property(abstract)',    ''  ), \
            ('property(department)',  '0'), \
            ('property(docid)',       ''), \
            ('property(fromdate)',    datestr), \
            ('property(gazetteno)',   ''), \
            ('property(gazettePart)', '0'), \
            ('property(gazetteType)', '0'), \
            ('property(month1)',      '0'), \
            ('property(search)',      'search'), \
            ('property(todate)',      datestr), \
            ('property(year1)',       '0'), \
        ]
        return postdata
Esempio n. 15
0
    def date_postdata(self, dateobj):
        currentDate = utils.dateobj_to_str(dateobj, '-')

        postdata = [('__EVENTTARGET', ''), ('__EVENTARGUMENT', '')] 
        postdata.extend(self.state_data())

        otherdata = [\
          ('ctl00$ContPlaceHolderMain$TextBox1',   ''), \
          ('ctl00$ContPlaceHolderMain$search1',    'search'), \
          ('ctl00$ContPlaceHolderMain$btn', 'allwordbtn'), \
          ('ctl00$ContPlaceHolderMain$btn1',       'textbtn'), \
          ('ctl00$ContPlaceHolderMain$ddlmember','--- Select Member Name ---'),\
          ('ctl00$ContPlaceHolderMain$ddldebtype','--- Select Debate Type ---'),\
          ('ctl00$ContPlaceHolderMain$ddlsession', '--- Select Session ---'), \
        ]
        postdata.extend(otherdata)
        postdata.append(('ctl00$ContPlaceHolderMain$ddldatefrom', currentDate))
        postdata.append(('ctl00$ContPlaceHolderMain$ddldateto', currentDate))
        return postdata
Esempio n. 16
0
    def get_post_data(self, tags, dateobj):
        datestr = utils.dateobj_to_str(dateobj, '/', reverse=False)
        postdata = []

        radio_set = False
        for tag in tags:
            name = None
            value = None

            if tag.name == 'input':
                name = tag.get('name')
                value = tag.get('value')
                t = tag.get('type')
                if t == 'image':
                    continue
                if name == 'BtnElectronicGazette':
                    continue

                if name == 'RBLanguage' \
                        and radio_set:
                    continue

                if name == 'GMDatePicker1$ctl00' or \
                        name == 'GMGazzetteDate$ctl00':
                    value = datestr
                elif name == 'RBLanguage':
                    value = 'Both'
                    radio_set = True
            elif tag.name == 'select':
                name = tag.get('name')
                if name == 'BtnSearch':
                    value = 'Search'
                elif name == 'DDListCategory':
                    value = ''
                elif name == 'DDListDepartment':
                    value = ''

            if name:
                if value == None:
                    value = u''
                postdata.append((name, value))
        return postdata
Esempio n. 17
0
    def get_post_data(self, tags, dateobj):
        datestr = utils.dateobj_to_str(dateobj, '-', reverse=True)
        postdata = []

        radio_set = False
        for tag in tags:
            name = None
            value = None

            if tag.name == 'input':
                name = tag.get('name')
                value = tag.get('value')
                t = tag.get('type')
                if t == 'image' or name == 'ctl00$ContentPlaceHolder1$archiveNotification':
                    continue
                if name == 'ctl00$ContentPlaceHolder1$RadioButtonList1' \
                        and radio_set:
                    continue

                if name == 'ctl00$ContentPlaceHolder1$txtstartdate' or \
                        name == 'ctl00$ContentPlaceHolder1$txtenddate':
                    value = datestr
                elif name == 'ctl00$ContentPlaceHolder1$Button1':
                    value = 'Submit'
                elif name == 'ctl00$ContentPlaceHolder1$RadioButtonList1':
                    value = '-1'
                    radio_set = True
            elif tag.name == 'select':
                name = tag.get('name')
                if name == 'BtnSearch':
                    value = 'search'
                elif name == 'ctl00$ContentPlaceHolder1$ddlGazetteCat':
                    value = '-1'
                elif name == 'ctl00$ContentPlaceHolder1$ddldepartment':
                    value = '-1'

            if name:
                if value == None:
                    value = u''
                postdata.append((name, value))
        return postdata
Esempio n. 18
0
    def download_oneday(self, relpath, dateobj):
        self.get_cookies()
        posturl  = self.baseurl + '/ordqryrepact_action.php'

        fromdate   = utils.dateobj_to_str(dateobj, '-')
        todate     = fromdate 
      
        postdata = [('pageno', 1), ('frmaction', ''), ('actcode', 0), \
                    ('frmdate', fromdate), \
                    ('todate', todate), ('submit1', 'Submit')]

        newdls = []
        linkdict = {}
        for sideflag in ['C', 'CR', 'OS', 'NC', 'NR', 'AC', 'AR']:
            data = postdata[:]
            data.insert(2, ('m_sideflg', sideflag))
      
            webpage = self.download_url(posturl, postdata = data, \
                                        loadcookies = self.cookiefile.name) 
            newdls.extend(self.result_page(webpage, relpath, dateobj, linkdict))

        return newdls 
Esempio n. 19
0
    def get_post_data(self, tags, dateobj):
        datestr  = utils.dateobj_to_str(dateobj, '/')
        postdata = []

        for tag in tags:
            name  = None
            value = None

            if tag.name == 'input':
                name  = tag.get('name')
                value = tag.get('value')
                t     = tag.get('type')
                if t == 'image':
                    continue

                if name == 'ctl00$ContentPlaceHolder1$TYPE' and not value == 'RadioButton1':
                    continue

                if name == 'ctl00$ContentPlaceHolder1$BtnCancel':
                    continue

                if name == 'ctl00$ContentPlaceHolder1$TextBox1' or \
                        name == 'ctl00$ContentPlaceHolder1$TextBox2':
                    value = datestr
                elif name == 'ctl00$ContentPlaceHolder1$CheckBoxYearAll':
                    value = 'on'    
            elif tag.name == 'select':
                name = tag.get('name')
                if name == 'ctl00$ContentPlaceHolder1$ddlYear':
                    continue
                elif name == 'ctl00$ContentPlaceHolder1$ddlFilter':
                    value = '1'

            if name:
                if value == None:
                    value = u''
                postdata.append((name, value))
        return postdata
Esempio n. 20
0
    def get_post_data(self, tags, dateobj):
        datestr = utils.dateobj_to_str(dateobj, '')
        postdata = []

        for tag in tags:
            name = None
            value = None

            if tag.name == 'input':
                name = tag.get('name')
                value = tag.get('value')
                t = tag.get('type')
                if t == 'image' or name == 'Button2' or name == 'Button1':
                    continue

                if name == 'txttodate' or name == 'txtfrmdate':
                    value = datestr
                elif name in ['jobno', 'txtGoNo', 'txtSearchText']:
                    value = ''
            elif tag.name == 'select':
                name = tag.get('name')
                if name == 'BtnSearch':
                    value = 'search'
                elif name == 'DDLDeptname':
                    value = 'Select'
                elif name == 'DDLGoType':
                    value = 'Select'
                elif name == 'DropDownList1':
                    value = 'Select'

            if name:
                if value == None:
                    value = u''
                postdata.append((name, value))

        return postdata
Esempio n. 21
0
    def get_post_data(self, tags, dateobj):
        datestr = utils.dateobj_to_str(dateobj, '/')
        postdata = []

        for tag in tags:
            name = None
            value = None

            if tag.name == 'input':
                name = tag.get('name')
                value = tag.get('value')
                t = tag.get('type')
                if t == 'image':
                    continue

                if name == 'ctl00$ContentPlaceHolder2$btnExit':
                    continue

                if name == 'ctl00$ContentPlaceHolder2$btnShow':
                    value = value.encode('utf8')

                if name == 'ctl00$ContentPlaceHolder2$DaintyDate2' or \
                        name == 'ctl00$ContentPlaceHolder2$DaintyDate1':
                    value = datestr
            elif tag.name == 'select':
                name = tag.get('name')
                if name == 'ctl00$ContentPlaceHolder2$ddlType':
                    value = self.gztype
                if name == 'ctl00$ContentPlaceHolder2$ddldepart':
                    value = '0'
            if name:
                if value == None:
                    value = u''
                postdata.append((name, value))

        return postdata
Esempio n. 22
0
 def get_post_data(self, dateobj, city, pagenum):
     datestr = utils.dateobj_to_str(dateobj, '/')
     postdata = '<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><GetUpdatedObjects xmlns="http://schemas.eclipse.org/birt"><Operation><Target><Id>Document</Id><Type>Document</Type></Target><Operator>GetPage</Operator><Oprand><Name>City</Name><Value>%s</Value></Oprand><Oprand><Name>__isdisplay__City</Name><Value>%s</Value></Oprand><Oprand><Name>Serial No</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Serial No</Name><Value></Value></Oprand><Oprand><Name>Appeal No</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Appeal No</Name><Value></Value></Oprand><Oprand><Name>Assessee Name</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Assessee Name</Name><Value></Value></Oprand><Oprand><Name>searchWhat</Name><Value>searchByDate</Value></Oprand><Oprand><Name>__isdisplay__searchWhat</Name><Value>searchByDate</Value></Oprand><Oprand><Name>Order Date</Name><Value>%s</Value></Oprand><Oprand><Name>__isdisplay__Order Date</Name><Value>%s</Value></Oprand><Oprand><Name>Member Name</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Member Name</Name><Value></Value></Oprand><Oprand><Name>Pronouncement Date</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Pronouncement Date</Name><Value></Value></Oprand><Oprand><Name>__page</Name><Value>%d</Value></Oprand><Oprand><Name>__svg</Name><Value>true</Value></Oprand></Operation></GetUpdatedObjects></soap:Body></soap:Envelope>' % (city, city, datestr, datestr, pagenum)
     return postdata
Esempio n. 23
0
    def download_oneday(self, relpath, dateobj):
        dls = []
        if dateobj >= self.flip_date1:
            if dateobj >= self.flip_date2:
                datestr = '%d-%d-%d' % (dateobj.day, dateobj.month,
                                        dateobj.year)
            else:
                datestr = '%s-%s-%d' % (utils.pad_zero(
                    dateobj.day), utils.pad_zero(dateobj.month), dateobj.year)
            mainhref = 'Contents-(%s).pdf' % datestr
        else:
            datestr = utils.dateobj_to_str(dateobj, '', reverse=True)
            mainhref = 'Contents(%s-%s-%s).pdf' % (utils.pad_zero(
                dateobj.day), utils.pad_zero(
                    dateobj.month), utils.pad_zero(dateobj.year % 100))

        dateurl = self.baseurl % datestr
        docurl = urllib.basejoin(dateurl, mainhref)

        mainmeta = utils.MetaInfo()
        mainmeta.set_date(dateobj)
        mainmeta.set_url(self.url_fix(docurl))

        response = self.download_url(docurl)
        if not response or not response.webpage or response.error:
            return dls

        mainrelurl = os.path.join(relpath, 'main')
        updated = False
        if self.storage_manager.save_rawdoc(self.name, mainrelurl,
                                            response.srvresponse,
                                            response.webpage):
            self.logger.info(u'Saved rawfile %s' % mainrelurl)
            updated = True

        page_type = self.get_file_extension(response.webpage)
        if page_type != 'pdf':
            self.logger.warn(
                'Got a non-pdf page and we can\'t handle it for datte %s',
                dateobj)
            return dls

        links = []
        linknames = []
        hrefs = utils.extract_links_from_pdf(StringIO(response.webpage))
        for href in hrefs:
            reobj = re.search('(?P<num>Part-\w+)', href)
            if reobj:
                partnum = reobj.groupdict()['num']
            else:
                partnum = '%s' % href
                reobj = re.search('.pdf$', partnum)
                if partnum:
                    partnum = partnum[:reobj.start()]

            relurl = os.path.join(relpath, partnum)
            docurl = urllib.basejoin(dateurl, href)

            metainfo = utils.MetaInfo()
            metainfo.set_date(dateobj)
            metainfo['partnum'] = partnum

            links.append(relurl)
            linknames.append(partnum)

            if self.save_gazette(relurl, docurl, metainfo):
                dls.append(relurl)

        mainmeta['links'] = links
        mainmeta['linknames'] = linknames
        if self.storage_manager.save_metainfo(self.name, mainrelurl, mainmeta):
            updated = True
            self.logger.info(u'Saved metainfo %s' % mainrelurl)

        if updated:
            dls.append(mainrelurl)

        return dls
Esempio n. 24
0
    def download_oneday(self, relpath, dateobj):
        newdls  = []

        pageurl = urllib.basejoin(self.baseurl, '/gujarathc/')

        datestr = utils.dateobj_to_str(dateobj, '-')
        dateurl = pageurl + 'orderdatewisedata.jsp?fdate=%s&tdate=%s' % \
                                (datestr, datestr)

        webpage = self.download_url (dateurl, referer = self.baseurl, \
                                     loadcookies = self.cookiefile.name)

        if not webpage:
            self.logger.warning(u'No webpage for %s' % dateurl)            
            return newdls

        webpage = re.sub('(?P<windowopen>window.open\([^)]+\))', \
                         self.sanitize_windowopen, webpage)

        d = utils.parse_webpage(webpage)

        if not d:
            self.logger.error(u'Could not parse html of the result page for date %s' % dateobj)
            return newdls

        trs = d.findAll('tr')
        for tr in trs:
            link = tr.find('a')
            if not link:
                self.logger.info(u'No link in %s' % tr)
                continue

            href = link.get('onclick')
            if not href:
                self.logger.info(u'No href in %s' % tr)
                continue

            reobj = re.search("showoj.jsp?[^'\s]+", href)

            (start, end) = reobj.span()

            pagerelurl = href[start:end]          
            url = urllib.basejoin(pageurl, pagerelurl)

            filename = utils.url_to_filename(url, False, ['caseyr', 'caseno', \
                                                          'casetype'])

            if not filename:
                self.logger.error(u'Could not get filename for %s' % url)
                continue
            relurl   = os.path.join(relpath, filename)
            filepath = os.path.join(self.rawdir, relurl)
            metapath = os.path.join(self.metadir, relurl)

            if not os.path.exists(filepath):
                self.logger.info(u'Downloading %s %s' % (url, filename))
                j = self.download_url(url, loadcookies = self.cookiefile.name)
                 
                if not j:
                    self.logger.warning(u'No webpage: %s' % url)
                else:
                    self.logger.info(u'Saving %s' % filepath)
                    utils.save_file(filepath, j)
                    newdls.append(relurl)
           
            if os.path.exists(filepath) and \
                    (self.updateMeta or not os.path.exists(metapath)):
                metainfo = self.get_meta_info(link, tr, dateobj)
                if metainfo:
                    utils.print_tag_file(metapath, metainfo)

        return newdls
Esempio n. 25
0
 def get_post_data(self, dateobj, city, pagenum):
     datestr = utils.dateobj_to_str(dateobj, '/')
     postdata = '<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><GetUpdatedObjects xmlns="http://schemas.eclipse.org/birt"><Operation><Target><Id>Document</Id><Type>Document</Type></Target><Operator>GetPage</Operator><Oprand><Name>City</Name><Value>%s</Value></Oprand><Oprand><Name>__isdisplay__City</Name><Value>%s</Value></Oprand><Oprand><Name>Serial No</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Serial No</Name><Value></Value></Oprand><Oprand><Name>Appeal No</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Appeal No</Name><Value></Value></Oprand><Oprand><Name>Assessee Name</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Assessee Name</Name><Value></Value></Oprand><Oprand><Name>searchWhat</Name><Value>searchByDate</Value></Oprand><Oprand><Name>__isdisplay__searchWhat</Name><Value>searchByDate</Value></Oprand><Oprand><Name>Order Date</Name><Value>%s</Value></Oprand><Oprand><Name>__isdisplay__Order Date</Name><Value>%s</Value></Oprand><Oprand><Name>Member Name</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Member Name</Name><Value></Value></Oprand><Oprand><Name>Pronouncement Date</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Pronouncement Date</Name><Value></Value></Oprand><Oprand><Name>__page</Name><Value>%d</Value></Oprand><Oprand><Name>__svg</Name><Value>true</Value></Oprand></Operation></GetUpdatedObjects></soap:Body></soap:Envelope>' % (city, city, datestr, datestr, pagenum)
     return postdata
Esempio n. 26
0
 def date_in_form(self, dateobj):
     return [('juddt', utils.dateobj_to_str(dateobj, '/'))]