Ejemplo n.º 1
0
 def _init_agent_info(self):
     """Parse the `User-Agent' header."""
     self.m_agentinfo = None
     agent = self.header('User-Agent')
     if not agent:
         return
     agentinfo = http.parse_user_agent(agent)
     self.m_agentinfo = agentinfo
Ejemplo n.º 2
0
 def test_parse_agent(self):
     for agent,ref in self.user_agents:
         parsed = http.parse_user_agent(agent)
         assert parsed == ref
Ejemplo n.º 3
0
merged = 'robots.ini'
exclude = ('???', 'no', 'yes', 'none', 'not available')


print 'Downloading robot signatures from: %s' % url
robots = []
fin = urllib.urlopen(url)
for line in fin:
    if not line:
        break
    if not line.startswith('robot-useragent:'):
        continue
    agent = line[16:].strip().lower()
    if not agent or agent in exclude:
        continue
    agent, version, info = http.parse_user_agent(agent)
    if agent not in robots:
        robots.append(agent)
fin.close()
print 'Done (%d signatures)' % len(robots)

print 'Write: %s' % txtorg
robots.sort()
fout = file(txtorg, 'w')
for line in robots:
    fout.write(line + '\n')
fout.close()

print 'Read: %s' % local
fin = file(local)
for line in fin: