forked from sushant354/judis-re
/
madhyapradesh.py
151 lines (131 loc) · 5.62 KB
/
madhyapradesh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import utils
import tempfile
import urllib
import string
import os
import re
class MadhyaPradesh(utils.BaseCourt):
def __init__(self, name, rawdir, metadir, statsdir, updateMeta = False):
utils.BaseCourt.__init__(self, name, rawdir, metadir, statsdir, updateMeta)
self.cookiefile = tempfile.NamedTemporaryFile()
self.hostname = 'ldemo.mp.nic.in'
self.baseurl = 'http://ldemo.mp.nic.in'
self.cookieurl = urllib.basejoin(self.baseurl, \
'causelist/ciskiosk/ordermain.php')
self.dateurl = urllib.basejoin(self.baseurl, \
'/causelist/ciskiosk/order_action.php?as9=ok3')
def date_in_form(self, dateobj):
return '%s/%s/%s' % (utils.pad_zero(dateobj.day), \
utils.pad_zero(dateobj.month), \
utils.pad_zero(dateobj.year))
def get_cookies(self):
self.download_url(self.cookieurl, savecookies = self.cookiefile.name)
def get_meta_info(self, tr, dateobj):
metainfo = { 'date': utils.date_to_xml(dateobj)}
tds = tr.findAll('td')
i = 0
for td in tds:
txt = utils.get_tag_contents(td)
if txt:
reobj = re.search(' vs ', txt, re.IGNORECASE)
if reobj:
petitioner = string.strip(txt[:reobj.start()], ' \r\n-')
respondent = string.strip(txt[reobj.end():], ' \r\n-')
if petitioner:
metainfo['petitioner'] = petitioner
if respondent:
metainfo['respondent'] = respondent
elif i == 2:
metainfo['caseno'] = txt
i += 1
return metainfo
def download_judgment(self, link, filepath):
url = urllib.basejoin(self.dateurl, link)
self.logger.info(u'Downloading link %s' % url)
webpage = self.download_url(url, loadcookies = self.cookiefile.name)
if webpage:
utils.save_file(filepath, webpage)
return True
else:
return False
def handle_judgment_link(self, relpath, dateobj, tr):
links = tr.findAll('a')
if len(links) >= 1:
href = links[-1].get('href')
else:
return None
metainfo = self.get_meta_info(tr, dateobj)
rel = ''
if metainfo.has_key('caseno'):
rel += metainfo['caseno']
else:
if metainfo.has_key('petitioner'):
rel += metainfo['petitioner']
if metainfo.has_key('respondent'):
rel += metainfo['respondent']
if not rel:
return None
rel = string.replace(rel, '/', '-')
tmprel = os.path.join(relpath, rel)
filepath = os.path.join(self.rawdir, tmprel)
if not os.path.exists(filepath):
self.download_judgment(href, filepath)
if os.path.exists(filepath):
metapath = os.path.join(self.metadir, tmprel)
if metainfo and (self.updateMeta or not os.path.exists(metapath)):
utils.print_tag_file(metapath, metainfo)
return tmprel
else:
return None
def page_type(self, tr):
text = utils.get_tag_contents(tr)
if re.search(' vs ', text, re.IGNORECASE):
return 'judgment'
elif self.next_link(tr.findAll('a')):
return 'nextlink'
else:
return 'unknown'
def next_link(self, links):
for link in links:
contents = utils.get_tag_contents(link)
if string.find(contents, 'Next') >= 0:
return link
return None
def process_next_link(self, relpath, dateobj, nextlink):
url = urllib.basejoin(self.dateurl, nextlink.get('href'))
webpage = self.download_url(url, loadcookies = self.cookiefile.name)
return self.process_result_page(relpath, dateobj, webpage)
def process_result_page(self, relpath, dateobj, webpage):
newdls = []
d = utils.parse_webpage(webpage)
if not d:
self.logger.info(u'Could not parse result page for date %s' % dateobj)
return newdls
trs = d.findAll('tr')
for tr in trs:
pagetype = self.page_type(tr)
if pagetype == 'nextlink':
nextlink = self.next_link(tr.findAll('a'))
if nextlink:
self.logger.info(u'Going to the next page: %s' \
% utils.get_tag_contents(nextlink))
rels = self.process_next_link(relpath, dateobj, nextlink)
newdls.extend(rels)
elif pagetype == 'judgment':
rel = self.handle_judgment_link(relpath, dateobj, tr)
if rel:
newdls.append(rel)
else:
self.logger.info(u'Not processing %s' % tr)
return newdls
def download_oneday(self, relpath, dateobj):
self.get_cookies()
postdata = [('pageno', '1'), ('m_hc', '01'), ('mskey', ''), \
('m_no', ''), ('m_yr', ''), ('jud1', 0), \
('orddate1', ''), ('orddate2', ''), \
('orddate3', self.date_in_form(dateobj)), \
('petres', 'N'), ('m_party', ''), ('orddate4', ''), \
('orddate5', '')]
webpage = self.download_url(self.dateurl, postdata = postdata, \
loadcookies = self.cookiefile.name)
return self.process_result_page(relpath, dateobj, webpage)