class Crawler(object): def __init__(self): # Helper for wiki access self.wh = Wikihandy() # Reference information for data pushed to the wikibase self.reference = [ (self.wh.get_pid('source'), self.wh.get_qid('RIPE NCC')), (self.wh.get_pid('reference URL'), URL_RIPE_AS_NAME), (self.wh.get_pid('point in time'), self.wh.today()) ] def run(self): """Fetch the AS name file from RIPE website and process lines one by one""" req = requests.get(URL_RIPE_AS_NAME) if req.status_code != 200: sys.exit('Error while fetching AS names') self.wh.login() # Login once for all threads, not needed with OAuth for i, res in enumerate(map(self.update_asn, req.text.splitlines())): sys.stderr.write(f'\rProcessed {i} ASes') def update_asn(self, one_line): # Parse given line to get ASN, name, and country code asn, _, name_cc = one_line.partition(' ') name, _, cc = name_cc.rpartition(', ') asn_qid = self.wh.asn2qid(asn, create=True) cc_qid = self.wh.country2qid(cc, create=True) statements = [] statements.append( [self.wh.get_pid('country'), cc_qid, self.reference] ) # Set country if cc_qid is not None: statements.append( [self.wh.get_pid('name'), name, self.reference] ) # Set AS name try: # Update AS name and country self.wh.upsert_statements('updates from RIPE AS names', asn_qid, statements) except Exception as error: # print errors and continue running print('Error for: ', one_line) print(error) return asn_qid
class Crawler(object): def __init__(self): """Fetch QIDs for MANRS actions (create them if they are not in the wikibase).""" # Helper for wiki access self.wh = Wikihandy() # Actions defined by MANRS self.actions = [{ 'label': 'MANRS Action 1: Filtering', 'description': 'Prevent propagation of incorrect routing information' }, { 'label': 'MANRS Action 2: Anti-spoofing', 'description': 'Prevent traffic with spoofed source IP addresses' }, { 'label': 'MANRS Action 3: Coordination', 'description': 'Facilitate global operational communication and coordination' }, { 'label': 'MANRS Action 4: Global Validation', 'description': 'Facilitate routing information on a global scale' }] # Get the QID for the four items representing MANRS actions for action in self.actions: action['qid'] = self.wh.get_qid( action['label'], create={ # Create it if it doesn't exist 'summary': 'add MANRS actions', # Commit message 'description': action['description'] # Item description }) # Added properties will have this additional information today = self.wh.today() self.reference = [(self.wh.get_pid('source'), self.wh.get_qid('MANRS')), (self.wh.get_pid('reference URL'), URL_MANRS), (self.wh.get_pid('point in time'), today)] def run(self): """Fetch networks information from MANRS and push to wikibase. """ req = requests.get(URL_MANRS) if req.status_code != 200: sys.exit('Error while fetching MANRS csv file') for i, row in enumerate(req.text.splitlines()): # Skip the header if i == 0: continue self.update_net(row) sys.stderr.write(f'\rProcessed {i} organizations') def update_net(self, one_line): """Add the network to wikibase if it's not already there and update its properties.""" _, areas, asns, act1, act2, act3, act4 = [ col.strip() for col in one_line.split(',') ] # Properties statements = [ [ self.wh.get_pid('member of'), self.wh.get_qid('MANRS'), self.reference ], ] # set countries for cc in areas.split(';'): statements.append([ self.wh.get_pid('country'), self.wh.country2qid(cc), self.reference ]) # set actions for i, action_bool in enumerate([act1, act2, act3, act4]): if action_bool == 'Yes': statements.append([ self.wh.get_pid('implements'), self.actions[i]['qid'], self.reference ]) # Commit to wikibase for asn in asns.split(';'): if asn: # ignore organizations with no ASN # Get the AS QID (create if AS is not yet registered) and commit changes net_qid = self.wh.asn2qid(asn, create=True) self.wh.upsert_statements('update from MANRS membership', net_qid, statements)
class Crawler(object): def __init__(self): """Initialize wikihandy """ # Helper for wiki access self.wh = Wikihandy() # Added properties will have this additional information self.org_qid = self.wh.get_qid(ORG) self.countries = iso3166.countries_by_alpha2 # Session object to fetch peeringdb data retries = Retry(total=15, backoff_factor=0.2, status_forcelist=[104, 500, 502, 503, 504]) self.http_session = requests.Session() self.http_session.mount('https://', HTTPAdapter(max_retries=retries)) def run(self): """Fetch data from API and push to wikibase. """ for cc, country in self.countries.items(): # Query IHR self.url = URL_API.format(country=cc) req = self.http_session.get(self.url + '&format=json') if req.status_code != 200: sys.exit('Error while fetching data for ' + cc) data = json.loads(req.text) ranking = data['results'] # Setup references today = self.wh.today() self.references = [ (self.wh.get_pid('source'), self.org_qid), (self.wh.get_pid('reference URL'), self.url), (self.wh.get_pid('point in time'), today), ] # Setup qualifiers country_qid = self.wh.country2qid(country.name) if country_qid is not None: self.qualifiers = [(self.wh.get_pid('country'), country_qid)] else: self.qualifiers = [] # Find the latest timebin in the data last_timebin = '1970-01-01' for r in ranking: if arrow.get(r['timebin']) > arrow.get(last_timebin): last_timebin = r['timebin'] # Make ranking and push data for metric, weight in [('Total eyeball', 'eyeball'), ('Total AS', 'as')]: # Get the QID of the selected country / create this country if needed self.countryrank_qid = self.wh.get_qid( f'IHR country ranking: {metric} ({cc})', create={ # Create it if it doesn't exist 'summary': f'add IHR {metric} ranking for ' + cc, 'description': f"IHR's ranking of networks ({metric}) for " + country.name, 'statements': [[self.wh.get_pid('managed by'), self.org_qid]] }) # Filter out unnecessary data selected = [ r for r in ranking if (r['weightscheme'] == weight and r['transitonly'] == False and r['hege'] > MIN_HEGE and r['timebin'] == last_timebin) ] # Make sure the ranking is sorted and add rank field selected.sort(key=lambda x: x['hege'], reverse=True) for i, asn in enumerate(selected): asn['rank'] = i # Push data to wiki for i, res in enumerate(map(self.update_entry, selected)): sys.stderr.write( f'\rProcessing {country.name}... {i+1}/{len(selected)}' ) sys.stderr.write('\n') def update_entry(self, asn): """Add the network to wikibase if it's not already there and update its properties.""" # Properties statements = [] # set rank statements.append([ self.wh.get_pid('ranking'), { 'amount': asn['rank'], 'unit': self.countryrank_qid, }, self.references, self.qualifiers ]) # Commit to wikibase # Get the AS QID (create if AS is not yet registered) and commit changes net_qid = self.wh.asn2qid(asn['asn'], create=True) self.wh.upsert_statements('update from IHR country ranking', net_qid, statements, asynchronous=False)
class Crawler(object): def __init__(self): """Initialize wikihandy and qualifiers for pushed data""" # Helper for wiki access self.wh = Wikihandy() # Added properties will have this additional information today = self.wh.today() self.caida_qid = self.wh.get_qid('CAIDA') # Get the QID for ASRank project self.asrank_qid = self.wh.get_qid( 'CAIDA ASRank', create={ # Create it if it doesn't exist 'summary': 'add CAIDA ASRank', # Commit message 'description': "CAIDA's AS ranking derived from topological data collected by CAIDA's Archipelago Measurement Infrastructure and BGP routing data collected by the Route Views Project and RIPE NCC.", # Item description 'statements': [[self.wh.get_pid('managed by'), self.caida_qid]] }) self.reference = [(self.wh.get_pid('source'), self.caida_qid), (self.wh.get_pid('reference URL'), URL_API), (self.wh.get_pid('point in time'), today)] def run(self): """Fetch networks information from ASRank and push to wikibase. """ self.wh.login() # Login once for all threads pool = ThreadPoolExecutor() has_next = True i = 0 while has_next: req = requests.get(URL_API + f'?offset={i}') if req.status_code != 200: sys.exit('Error while fetching data from API') ranking = json.loads(req.text)['data']['asns'] has_next = ranking['pageInfo']['hasNextPage'] for res in pool.map(self.update_net, ranking['edges']): sys.stderr.write( f'\rProcessing... {i+1}/{ranking["totalCount"]}') i += 1 pool.shutdown() def update_net(self, asn): """Add the network to wikibase if it's not already there and update its properties.""" asn = asn['node'] # Properties statements = [] if asn['asnName']: statements.append( [self.wh.get_pid('name'), asn['asnName'], self.reference]) # set countries cc = asn['country']['iso'] if cc: statements.append([ self.wh.get_pid('country'), self.wh.country2qid(cc), self.reference ]) # set rank statements.append([ self.wh.get_pid('ranking'), { 'amount': asn['rank'], 'unit': self.asrank_qid, }, self.reference ]) # Commit to wikibase # Get the AS QID (create if AS is not yet registered) and commit changes net_qid = self.wh.asn2qid(asn['asn'], create=True) self.wh.upsert_statements('update from CAIDA ASRank', net_qid, statements)
class Crawler(object): def __init__(self): """Create an item representing the PeeringDB exchange point ID class if doesn't already exist. And fetch QIDs for exchange points already in the wikibase.""" # Helper for wiki access self.wh = Wikihandy() # Get the QID of the item representing PeeringDB IX IDs ixid_qid = self.wh.get_qid( IXID_LABEL, create={ # Create it if it doesn't exist 'summary': 'add PeeringDB ix IDs', # Commit message 'description': 'Identifier for an exchange point in the PeeringDB database' # Description }) # Load the QIDs for ix already available in the wikibase self.ixid2qid = self.wh.extid2qid(qid=ixid_qid) # Load the QIDs for peeringDB organizations self.orgid2qid = self.wh.extid2qid(label=ORGID_LABEL) # Added properties will have this reference information self.today = self.wh.today() self.reference = [(self.wh.get_pid('source'), self.wh.get_qid('PeeringDB')), (self.wh.get_pid('reference URL'), URL_PDB_IXS), (self.wh.get_pid('point in time'), self.today)] def run(self): """Fetch ixs information from PeeringDB and push to wikibase. Using multiple threads for better performances.""" req = requests.get(URL_PDB_IXS) if req.status_code != 200: sys.exit('Error while fetching IXs data') ixs = json.loads(req.text)['data'] self.wh.login() # Login once for all threads for i, ix in enumerate(ixs): # Get more info for this IX req = requests.get(f'{URL_PDB_IXS}/{ix["id"]}') if req.status_code != 200: sys.exit('Error while fetching IXs data') ix_info = json.loads(req.text)['data'][0] # Update info in wiki self.update_ix(ix_info) sys.stderr.write(f'\rProcessing... {i+1}/{len(ixs)}') def update_ix(self, ix): """Add the ix to wikibase if it's not already there and update its properties.""" # set property name statements = [[ self.wh.get_pid('instance of'), self.wh.get_qid('Internet exchange point') ], [self.wh.get_pid('name'), ix['name'].strip(), self.reference]] # link to corresponding organization org_qid = self.orgid2qid.get(str(ix['org_id'])) if org_qid is not None: statements.append( [self.wh.get_pid('managed by'), org_qid, self.reference]) else: print('Error this organization is not in wikibase: ', ix['org_id']) # set property country if ix['country']: country_qid = self.wh.country2qid(ix['country']) if country_qid is not None: statements.append( [self.wh.get_pid('country'), country_qid, self.reference]) # set property website if ix['website']: statements.append( [self.wh.get_pid('website'), ix['website'], self.reference]) # set traffic webpage if ix['url_stats']: statements.append([ self.wh.get_pid('website'), ix['url_stats'], # statement self.reference, # reference [ (self.wh.get_pid('instance of'), self.wh.get_qid('traffic statistics')), ] # qualifier ]) ix_qid = self.ix_qid(ix) # Update name, website, and organization for this IX self.wh.upsert_statements('update peeringDB ixs', ix_qid, statements) # update LAN corresponding to this IX if 'ixlan_set' in ix: for ixlan in ix['ixlan_set']: pfx_url = f'{URL_PDB_LAN}/{ixlan["id"]}' pfx_ref = [(self.wh.get_pid('source'), self.wh.get_qid('PeeringDB')), (self.wh.get_pid('reference URL'), pfx_url), (self.wh.get_pid('point in time'), self.today)] req = requests.get(pfx_url) if req.status_code != 200: sys.exit('Error while fetching IXs data') lans = json.loads(req.text)['data'] for lan in lans: for prefix in lan['ixpfx_set']: pfx_qid = self.wh.prefix2qid(prefix['prefix'], create=True) pfx_stmts = [[ self.wh.get_pid('instance of'), self.wh.get_qid('peering LAN'), pfx_ref ], [self.wh.get_pid('managed by'), ix_qid, pfx_ref]] self.wh.upsert_statements('update peeringDB ixlan', pfx_qid, pfx_stmts) return ix_qid def ix_qid(self, ix): """Find the ix QID for the given ix. If this ix is not yet registered in the wikibase then add it. Return the ix QID.""" # Check if the IX is in the wikibase if str(ix['id']) not in self.ixid2qid: # Set properties for this new ix ix_qualifiers = [ (self.wh.get_pid('instance of'), self.wh.get_qid(IXID_LABEL)), ] statements = [(self.wh.get_pid('instance of'), self.wh.get_qid('Internet exchange point')), (self.wh.get_pid('external ID'), str(ix['id']), [], ix_qualifiers)] # Add this ix to the wikibase ix_qid = self.wh.add_item('add new peeringDB IX', label=ix['name'], description=ix['name_long'], statements=statements) # keep track of this QID self.ixid2qid[str(ix['id'])] = ix_qid return self.ixid2qid[str(ix['id'])]
class Crawler(object): def __init__(self): """Initialize wikihandy and qualifiers for pushed data""" # Helper for wiki access self.wh = Wikihandy() # Added properties will have this additional information today = self.wh.today() self.apnic_qid = self.wh.get_qid('APNIC') self.url = URL_API # url will change for each country self.reference = [(self.wh.get_pid('source'), self.apnic_qid), (self.wh.get_pid('reference URL'), self.url), (self.wh.get_pid('point in time'), today)] self.countries = iso3166.countries_by_alpha2 def run(self): """Fetch data from APNIC and push to wikibase. """ self.wh.login() # Login once for all threads pool = ThreadPoolExecutor() for cc, country in self.countries.items(): # Get the QID of the selected country / create this country if needed self.countryrank_qid = self.wh.get_qid( f'APNIC eyeball estimates ({cc})', create={ # Create it if it doesn't exist 'summary': 'add APNIC eyeball estimates for ' + cc, 'description': "APNIC's AS population estimates" + "based on advertisement for " + country.name, 'statements': [ [self.wh.get_pid('managed by'), self.apnic_qid], [self.wh.get_pid('website'), URL_API], [self.wh.get_pid('country'), self.wh.country2qid(cc)], ] }) self.countrypercent_qid = self.wh.get_qid( f'% of Internet users in {country.name}', create={ # Create it if it doesn't exist 'summary': 'add APNIC eyeball estimates for ' + cc, 'description': "APNIC's AS population estimates" + "based on advertisement for " + country.name, 'statements': [ [self.wh.get_pid('managed by'), self.apnic_qid], [self.wh.get_pid('website'), URL_API], [self.wh.get_pid('country'), self.wh.country2qid(cc)], ] }) self.url = URL_API + f'{cc}/{cc}.asns.json?m={MIN_POP_PERC}' req = requests.get(self.url) if req.status_code != 200: sys.exit('Error while fetching data for ' + cc) ranking = json.loads(req.text) # Make sure the ranking is sorted and add rank field ranking.sort(key=lambda x: x['percent'], reverse=True) for i, asn in enumerate(ranking): asn['rank'] = i # Push data to wiki for i, res in enumerate(pool.map(self.update_net, ranking)): sys.stderr.write( f'\rProcessing {country.name}... {i+1}/{len(ranking)}') pool.shutdown() def update_net(self, asn): """Add the network to wikibase if it's not already there and update its properties.""" # Properties statements = [] # set name if asn['autnum']: statements.append( [self.wh.get_pid('name'), asn['autnum'], self.reference]) # set country if asn['cc']: statements.append([ self.wh.get_pid('country'), self.wh.country2qid(asn['cc']), self.reference ]) # set rank statements.append([ self.wh.get_pid('ranking'), { 'amount': asn['rank'], 'unit': self.countryrank_qid, }, self.reference ]) # set population statements.append([ self.wh.get_pid('population'), { 'amount': asn['percent'], 'unit': self.countrypercent_qid, }, self.reference ]) # Commit to wikibase # Get the AS QID (create if AS is not yet registered) and commit changes net_qid = self.wh.asn2qid(asn['as'], create=True) self.wh.upsert_statements('update from APNIC eyeball ranking', net_qid, statements)
class Crawler(object): def __init__(self): """ """ # Helper for wiki access self.wh = Wikihandy(preload=True) # Get the QID for RIPE Atlas self.atlas_qid = self.wh.get_qid( 'RIPE Atlas', create={ # Create it if it doesn't exist 'summary': 'add RIPE Atlas', # Commit message 'description': 'RIPE Atlas is a global, open, distributed Internet measurement platform, consisting of thousands of measurement devices that measure Internet connectivity in real time.', # Item description 'aliases': 'Atlas|atlas', 'statements': [[self.wh.get_pid('managed by'), self.wh.get_qid('RIPE NCC')]] }) # Get the QID for Atlas Probe self.atlas_probe_qid = self.wh.get_qid( 'Atlas probe', create={ # Create it if it doesn't exist 'summary': 'add RIPE Atlas', # Commit message 'description': 'RIPE Atlas probes form the backbone of the RIPE Atlas infrastructure.', # Item description 'aliases': 'RIPE Atlas probe|atlas probe|RIPE atlas probe', 'statements': [[self.wh.get_pid('part of'), self.atlas_qid]] }) # Get the QID for Atlas Anchor self.atlas_anchor_qid = self.wh.get_qid( 'Atlas anchor', create={ # Create it if it doesn't exist 'summary': 'add RIPE Atlas', # Commit message 'description': 'RIPE Atlas Anchors are located at hosts that can provide sufficient bandwidth to support a large number of incoming and outgoing measurements.', # Item description 'aliases': 'RIPE Atlas anchor|atlas anchor|RIPE atlas anchor', 'statements': [[self.wh.get_pid('part of'), self.atlas_qid]] }) # Get the QID of the item representing PeeringDB IX IDs self.probeid_qid = self.wh.get_qid( PROBEID_LABEL, create={ # Create it if it doesn't exist 'summary': 'add RIPE Atlas probes', # Commit message 'description': 'Identifier for a probe in the RIPE Atlas measurement platform' # Description }) # Load the QIDs for probes already available in the wikibase self.probeid2qid = self.wh.extid2qid(qid=self.probeid_qid) # Added properties will have this additional information today = self.wh.today() self.reference = [(self.wh.get_pid('source'), self.wh.get_qid('RIPE NCC')), (self.wh.get_pid('reference URL'), URL), (self.wh.get_pid('point in time'), today)] self.v4_qualifiers = [(self.wh.get_pid('IP version'), self.wh.get_qid('IPv4'))] self.v6_qualifiers = [(self.wh.get_pid('IP version'), self.wh.get_qid('IPv6'))] def run(self): """Fetch probe information from Atlas API and push to wikibase. """ next_page = URL while next_page is not None: req = requests.get(next_page) if req.status_code != 200: sys.exit('Error while fetching the blocklist') info = json.loads(req.text) next_page = info['next'] for i, probe in enumerate(info['results']): self.update_probe(probe) sys.stderr.write(f'\rProcessed {i+1} probes') sys.stderr.write(f'\n') def update_probe(self, probe): """Add the probe to wikibase if it's not already there and update its properties.""" # TODO add status, geometry (geo-location) and IPs? # Properties for this probe statements = [] if probe['is_anchor']: statements.append( [self.wh.get_pid('instance of'), self.atlas_probe_qid]) statements.append( [self.wh.get_pid('instance of'), self.atlas_anchor_qid]) if probe['asn_v4']: as_qid = self.wh.asn2qid(probe['asn_v4']) if as_qid: statements.append([ self.wh.get_pid('part of'), as_qid, self.reference, self.v4_qualifiers ]) if probe['asn_v6']: as_qid = self.wh.asn2qid(probe['asn_v6']) if as_qid: statements.append([ self.wh.get_pid('part of'), as_qid, self.reference, self.v6_qualifiers ]) if probe['prefix_v4']: prefix_qid = self.wh.prefix2qid(probe['prefix_v4']) if prefix_qid: statements.append( [self.wh.get_pid('part of'), prefix_qid, self.reference]) if probe['prefix_v6']: prefix_qid = self.wh.prefix2qid(probe['prefix_v6']) if prefix_qid: statements.append( [self.wh.get_pid('part of'), prefix_qid, self.reference]) if probe['country_code']: statements.append([ self.wh.get_pid('country'), self.wh.country2qid(probe['country_code']), self.reference ]) if probe['first_connected']: statements.append([ self.wh.get_pid('start time'), self.wh.to_wbtime(probe['first_connected']), self.reference ]) if 'name' in probe['status']: # Get the QIDs for probes status status_qid = self.wh.get_qid( f'RIPE Atlas probe status: {probe["status"]["name"]}', create={ # Create it if it doesn't exist 'summary': 'add RIPE Atlas probe status', # Commit message }) if probe['status_since']: statements.append([ self.wh.get_pid('status'), status_qid, self.reference, [(self.wh.get_pid('start time'), self.wh.to_wbtime(probe['status_since']))] ]) # set end time if the probe is abandonned if probe['status']['name'] == 'Abandoned' and probe['status_since']: statements.append([ self.wh.get_pid('end time'), self.wh.to_wbtime(probe['status_since']) ]) # Add probe tags for tag in probe['tags']: statements.append([ self.wh.get_pid('tag'), self.wh.get_qid(tag['name'], create={ 'summary': 'Add RIPE Atlas tag', }) ]) # Commit to wikibase # Get the probe QID (create if probe is not yet registered) and commit changes probe_qid = self.probe_qid(probe) self.wh.upsert_statements('update from RIPE Atlas probes', probe_qid, statements) def probe_qid(self, probe): """Find the ix QID for the given probe ID. If this probe is not yet registered in the wikibase then add it. Return the probe QID.""" id = str(probe['id']) # Check if the IX is in the wikibase if id not in self.probeid2qid: # Set properties for this new probe probeid_qualifiers = [ (self.wh.get_pid('instance of'), self.probeid_qid), ] statements = [ (self.wh.get_pid('instance of'), self.atlas_probe_qid), (self.wh.get_pid('external ID'), id, [], probeid_qualifiers) ] # Add this probe to the wikibase probe_qid = self.wh.add_item('add new RIPE Atlas probe', label=f'RIPE Atlas probe #{id}', description=probe['description'], statements=statements) # keep track of this QID self.probeid2qid[id] = probe_qid return self.probeid2qid[id]
class Crawler(object): def __init__(self): """ """ # Helper for wiki access self.wh = Wikihandy(preload=True) # Get the QID for Spamhaus organization self.spamhaus_qid = self.wh.get_qid( 'Spamhaus', create={ # Create it if it doesn't exist 'summary': 'add Spamhaus organization', # Commit message 'description': 'The Spamhaus Project is an international organisation to track email spammers and spam-related activity', # Item description 'aliases': 'The Spamhaus Project|the spamhaus project', 'statements': [[ self.wh.get_pid('instance of'), self.wh.get_qid('organization') ]] }) # Get the QID for Spamhaus DROP project self.drop_qid = self.wh.get_qid( 'Spamhaus DROP lists', create={ # Create it if it doesn't exist 'summary': 'add Spamhaus block list', # Commit message 'description': "The Spamhaus Don't Route Or Peer Lists", # Item description 'statements': [[self.wh.get_pid('managed by'), self.spamhaus_qid]] }) # Get the QID for Spamhaus ASN-DROP list self.asn_drop_qid = self.wh.get_qid( 'Spamhaus ASN-DROP list', create={ # Create it if it doesn't exist 'summary': 'add Spamhaus block list', # Commit message 'description': 'ASN-DROP contains a list of Autonomous System Numbers controlled by spammers or cyber criminals, as well as "hijacked" ASNs. ', # Item description 'statements': [[self.wh.get_pid('managed by'), self.spamhaus_qid], [self.wh.get_pid('part of'), self.drop_qid]] }) # Added properties will have this additional information today = self.wh.today() self.reference = [(self.wh.get_pid('source'), self.spamhaus_qid), (self.wh.get_pid('reference URL'), URL), (self.wh.get_pid('point in time'), today)] def run(self): """Fetch blocklist from Spamhaus and push to wikibase. """ req = requests.get(URL) if req.status_code != 200: sys.exit('Error while fetching the blocklist') for i, row in enumerate(req.text.splitlines()): # Skip the header if row.startswith(';'): continue self.update_net(row) sys.stderr.write(f'\rProcessed {i+1} ASes') sys.stderr.write(f'\n') def update_net(self, one_line): """Add the network to wikibase if it's not already there and update its properties.""" asn, _, cc_name = one_line.partition(';') asn = int(asn[2:]) cc, name = [word.strip() for word in cc_name.split('|')] # Properties for this AS statements = [ [ self.wh.get_pid('reported in'), self.asn_drop_qid, self.reference ], [self.wh.get_pid('name'), name, self.reference], ] # set countries if len(cc) == 2: cc_qid = self.wh.country2qid(cc) if cc_qid is not None: statements.append( [self.wh.get_pid('country'), cc_qid, self.reference]) # Commit to wikibase # Get the AS QID (create if AS is not yet registered) and commit changes net_qid = self.wh.asn2qid(asn, create=True) self.wh.upsert_statements('update from Spamhaus ASN DROP list', net_qid, statements)