Example #1
0
    def get(self):
        code = int(self.request.get('code', '0'))
        q = Department.gql('WHERE dptCode >= :1 ORDER BY dptCode', code)
        dpts = q.fetch(limit=2)
        nowDpt  = dpts[0]
        
        soup = BeautifulSoup( urllib2.urlopen( nowDpt.dptLink ) )
        list = soup.table.findAll('a')
        for one in list:
            text = one.text;
            name = re.split('[0-9]', text)[0]
            code = text[ len(name):].split(' ')[0].split('(')[0]  # Dealing w/ the special cases
            doc  = Doctor.all().filter('docCode =', code).get()
            if not doc and len(code) != 0:
                doc = Doctor()
                doc.docName = name
                doc.docCode = code
                doc.put()
        
        if( len(dpts) > 1):
            nextDpt  = dpts[1] 
            nextUrl  = '/parse/doctor?code=%d' %  nextDpt.dptCode 
            nextName = nextDpt.dptName
        else:
            nextUrl  = '/'
            nextName = 'END OF PARSING'

        context = { 
            'type'    : 'Doctor',
            'nextUrl' : nextUrl,
            'nextName': nextName,
        }
        path = os.path.join( os.path.dirname('__file__'), 'templates', 'parser.html')
        self.response.out.write( template.render( path, context) )
Example #2
0
    def get(self):
        code = int(self.request.get('code', '0'))
        q = Department.gql('WHERE dptCode >= :1 ORDER BY dptCode', code)
        dpts = q.fetch(limit=2)
        nowDpt  = dpts[0]
        
        soup = BeautifulSoup( urllib2.urlopen( nowDpt.dptLink ) )
        trlist = soup.table.findAll('tr', align='left')
        for tr in trlist:
            tdlist = tr.findAll('td')
            
            column = 0;
            for td in tdlist:
                if column == 0:
                    dateStr = td.text.split('(')[1].split(')')[0]
                    month   = dateStr.split('/')[0]
                    day     = dateStr.split('/')[1]
                    year    = str(datetime.datetime.now().year)
                else:
                    if column == 1:
                        timeStr = 'A'
                    elif column == 2:
                        timeStr = 'B'
                    else:
                        timeStr = 'C'
 
                    alist = td.findAll(lambda tag: tag.name == 'a' and len(tag.attrs) == 2)
                    for a in alist:
                        text = a.text
                        name = re.split('[0-9]', text)[0]
                        doc  = Doctor.all().filter('docName = ', name).get()
                        if doc:
                            clinic = Clinic()
                            link = a['href']
                            code = link.split('data=')[1].split('&sLoc')[0]
                            clinic.link = tzuPrifix + link
                            clinic.code = code
                            clinic.doctor = doc.key()
                            clinic.dept   = nowDpt.key()
                            clinic.date   = year + '-' + month + '-' + day + '-' + timeStr
                            clinic.put()

                column = column + 1
        
        if( len(dpts) > 1):
            nextDpt = dpts[1]
            nextUrl  = '/parse/clinic?code=%d' %  nextDpt.dptCode 
            nextName = nextDpt.dptName
        else:
            nextUrl  = '/'
            nextName = 'END OF PARSING'

        context = { 
            'type'    : 'Clinic',
            'nextUrl' : nextUrl,
            'nextName': nextName,
        }
        path = os.path.join( os.path.dirname('__file__'), 'templates', 'parser.html')
        self.response.out.write( template.render( path, context) )