def _init_agent_info(self): """Parse the `User-Agent' header.""" self.m_agentinfo = None agent = self.header('User-Agent') if not agent: return agentinfo = http.parse_user_agent(agent) self.m_agentinfo = agentinfo
def test_parse_agent(self): for agent,ref in self.user_agents: parsed = http.parse_user_agent(agent) assert parsed == ref
merged = 'robots.ini' exclude = ('???', 'no', 'yes', 'none', 'not available') print 'Downloading robot signatures from: %s' % url robots = [] fin = urllib.urlopen(url) for line in fin: if not line: break if not line.startswith('robot-useragent:'): continue agent = line[16:].strip().lower() if not agent or agent in exclude: continue agent, version, info = http.parse_user_agent(agent) if agent not in robots: robots.append(agent) fin.close() print 'Done (%d signatures)' % len(robots) print 'Write: %s' % txtorg robots.sort() fout = file(txtorg, 'w') for line in robots: fout.write(line + '\n') fout.close() print 'Read: %s' % local fin = file(local) for line in fin: