def DiagnoseCMS(uobj): '''receive a urlopener object, diagnose what sort of CMS the site uses''' uoi = uobj.info() if CMSciteseerx.diagnose(uobj): return 'CMS_citeseerx' if MIMEMAP[uoi.gettype()] == 'html': return 'CMS_normal' elif MIMEMAP[uoi.gettype()] == 'pdf': return 'CMS_pdf' else: raise Exception, 'This is not a handlable file type'