def pull_group(pouet_id, releaser_id): url = 'http://api.pouet.net/v1/group/?id=%d' % int(pouet_id) req = urllib2.Request(url, None, {'User-Agent': settings.HTTP_USER_AGENT}) page = urllib2.urlopen(req) response = json.loads(page.read()) page.close() if not response.get('success'): logger.warning("pouet.net API request returned non-success! %r" % response) return logger.info("API request to %s succeeded" % url) group_data = response['group'] group, created = Group.objects.update_or_create(pouet_id=pouet_id, defaults={ 'name': group_data['name'], 'demozoo_id': group_data['demozoo'], 'last_seen_at': datetime.datetime.now(), }) groups_by_id = {int(pouet_id): group} if 'prods' not in group_data: return for prod_data in group_data['prods']: prod, created = Production.objects.update_or_create(pouet_id=prod_data['id'], defaults={ 'name': prod_data['name'], 'last_seen_at': datetime.datetime.now(), }) prod.groups = [fetch_group(g, groups_by_id) for g in prod_data['groups']] automatch_productions(Releaser.objects.get(id=releaser_id))
def pull_group(pouet_id, releaser_id): url = 'https://api.pouet.net/v1/group/?id=%d' % int(pouet_id) req = urllib.request.Request(url, None, {'User-Agent': settings.HTTP_USER_AGENT}) page = urllib.request.urlopen(req) response = json.loads(page.read()) page.close() if not response.get('success'): logger.warning("pouet.net API request returned non-success! %r" % response) return logger.info("API request to %s succeeded" % url) group_data = response['group'] group, created = Group.objects.update_or_create(pouet_id=pouet_id, defaults={ 'name': group_data['name'], 'demozoo_id': group_data['demozoo'], 'last_seen_at': datetime.datetime.now(), }) groups_by_id = {int(pouet_id): group} if 'prods' not in group_data: return for prod_data in group_data['prods']: prod, created = Production.objects.update_or_create(pouet_id=prod_data['id'], defaults={ 'name': prod_data['name'], 'last_seen_at': datetime.datetime.now(), }) prod.groups.set([fetch_group(g, groups_by_id) for g in prod_data['groups']]) automatch_productions(Releaser.objects.get(id=releaser_id))
def automatch_group(releaser_id): automatch_productions(Releaser.objects.get(id=releaser_id))
def handle(self, *args, **kwargs): verbose = kwargs['verbosity'] >= 1 # Dumps are published every Wednesday morning, so find out when last Wednesday was today = datetime.date.today() days_since_wednesday = (today.weekday() - 2) % 7 wednesday = today - datetime.timedelta(days=days_since_wednesday) datestamp = wednesday.strftime('%Y%m%d') monthstamp = wednesday.strftime('%Y%m') if verbose: print("importing groups...") groups_url = "https://data.pouet.net/dumps/%s/pouetdatadump-groups-%s.json.gz" % (monthstamp, datestamp) r = requests.get(groups_url, stream=True) groups_file = gzip.GzipFile(fileobj=r.raw) groups_imported = 0 groups_created = 0 group_db_ids = {} def handle_group(group_data): nonlocal groups_imported, groups_created if 'id' in group_data: group, created = Group.objects.update_or_create(pouet_id=group_data['id'], defaults={ 'name': group_data['name'], 'demozoo_id': group_data['demozoo'], 'last_seen_at': datetime.datetime.now(), }) group_db_ids[group_data['id']] = group.id groups_imported += 1 if groups_imported % 1000 == 0 and verbose: # pragma: no cover print("%d groups imported" % groups_imported) if created: groups_created += 1 json.load(groups_file, object_hook=handle_group) groups_file.close() if verbose: print("done. %d groups imported, of which %d newly created" % (groups_imported, groups_created)) if verbose: print("importing prods...") prods_url = "https://data.pouet.net/dumps/%s/pouetdatadump-prods-%s.json.gz" % (monthstamp, datestamp) r = requests.get(prods_url, stream=True) prods_file = gzip.GzipFile(fileobj=r.raw) prods_imported = 0 prods_created = 0 def handle_prod(prod_data): nonlocal prods_imported, prods_created, group_db_ids # prods JSON contains various nested objects, but only prod entries have a 'download' field if 'download' in prod_data: prod, created = Production.objects.update_or_create(pouet_id=prod_data['id'], defaults={ 'name': prod_data['name'], 'last_seen_at': datetime.datetime.now(), }) prod.groups.set([ group_db_ids[group['id']] for group in prod_data['groups'] ]) if created: unseen_download_link_ids = set() else: unseen_download_link_ids = set(prod.download_links.values_list('id', flat=True)) for link_data in prod_data['downloadLinks']: link, created = prod.download_links.get_or_create( url=link_data['link'], link_type=link_data['type'] ) if not created: unseen_download_link_ids.discard(link.id) if unseen_download_link_ids: prod.download_links.filter(id__in=unseen_download_link_ids).delete() prods_imported += 1 if prods_imported % 1000 == 0 and verbose: # pragma: no cover print("%d prods imported" % prods_imported) if created: prods_created += 1 return prod_data json.load(prods_file, object_hook=handle_prod) prods_file.close() if verbose: print("done. %d prods imported, of which %d newly created" % (prods_imported, prods_created)) # garbage-collect productions / groups that haven't been seen for 30 days (i.e. have been deleted from Pouet) last_month = datetime.datetime.now() - datetime.timedelta(days=30) Production.objects.filter(last_seen_at__lt=last_month).delete() Group.objects.filter(last_seen_at__lt=last_month).delete() if verbose: print("automatching prods...") pouetable_prod_types = get_pouetable_prod_types() for i, releaser in enumerate(Releaser.objects.filter(external_links__link_class='PouetGroup').only('id')): automatch_productions(releaser, pouetable_prod_types=pouetable_prod_types) if i % 10 == 0 and i != 0: # pragma: no cover if verbose: print("%d releasers automatched" % i) time.sleep(2)