Beispiel #1
0
def fetch_new_sceneorg_files(days=1):
	url = "https://files.scene.org/api/adhoc/latest-files/?days=%d" % days

	new_file_count = 0

	while True:
		req = urllib2.Request(url, None, {'User-Agent': user_agent})
		page = urllib2.urlopen(req)
		response = json.loads(page.read())
		page.close()

		if not response.get('success'):
			break

		for item in response['files']:
			path_components = item['fullPath'].split('/')[1:]
			dirs = path_components[:-1]

			path = '/'
			current_dir = Directory.objects.get_or_create(
				path='/', defaults={'last_seen_at': datetime.datetime.now()})

			for d in dirs:
				last_dir = current_dir
				path += d + '/'

				try:
					current_dir = Directory.objects.get(path=path)
					current_dir.last_seen_at = datetime.datetime.now()
					current_dir.is_deleted = False
					current_dir.save()
				except Directory.DoesNotExist:
					current_dir = Directory.objects.create(path=path, last_seen_at=datetime.datetime.now(), parent=last_dir)

			path += path_components[-1]

			try:
				f = File.objects.get(path=path)
				f.last_seen_at = datetime.datetime.now()
				f.is_deleted = False
				f.size = item['size']
				f.save()
			except File.DoesNotExist:
				File.objects.create(
					path=path, last_seen_at=datetime.datetime.now(), directory=current_dir,
					size=item['size'])
				new_file_count += 1

		url = response.get('nextPageURL')
		if url:
			time.sleep(1)
		else:
			break

	if new_file_count > 0:
		find_sceneorg_results_files()
Beispiel #2
0
def scan_dir_listing():
	new_file_count = 0
	for path, entries in parse_all_dirs():
		# print path
		try:
			dir = Directory.objects.get(path=path)
		except Directory.DoesNotExist:
			dir = Directory.objects.create(path=path, last_seen_at=datetime.datetime.now())

		new_file_count += update_dir_records(dir, entries)

	if new_file_count > 0:
		find_sceneorg_results_files()
Beispiel #3
0
def fetch_new_sceneorg_files(days=1):
    url = "https://files.scene.org/api/adhoc/latest-files/?days=%d" % days

    new_file_count = 0

    while True:
        req = urllib.request.Request(url, None,
                                     {'User-Agent': settings.HTTP_USER_AGENT})
        page = urllib.request.urlopen(req)
        response = json.loads(page.read())
        page.close()

        if not response.get('success'):
            logger.warning("scene.org API request returned non-success! %r" %
                           response)
            break

        logger.info("API request to %s succeeded - %d files returned" %
                    (url, len(response['files'])))

        for item in response['files']:
            # the fullPath field in the API consists of a byte string (de facto utf-8) interpreted
            # as windows-1252 and served to us as a Unicode string.
            # Here we encode as windows-1252 (to reconstruct the original bytestream as closely as
            # possible), then decode the bytestream as iso-8859-1 to embed that bytestream into
            # a unicode string that we can process and ultimately insert into the db.
            full_path = item['fullPath'].encode('Windows-1252',
                                                'ignore').decode('iso-8859-1')
            path_components = full_path.split('/')[1:]
            dirs = path_components[:-1]

            path = '/'
            current_dir, created = Directory.objects.get_or_create(
                path='/', defaults={'last_seen_at': datetime.datetime.now()})

            for d in dirs:
                last_dir = current_dir
                path += d + '/'

                try:
                    current_dir = Directory.objects.get(path=path)
                    current_dir.last_seen_at = datetime.datetime.now()
                    current_dir.is_deleted = False
                    current_dir.save()
                except Directory.DoesNotExist:
                    current_dir = Directory.objects.create(
                        path=path,
                        last_seen_at=datetime.datetime.now(),
                        parent=last_dir)

            path += path_components[-1]

            try:
                f = File.objects.get(path=path)
                f.last_seen_at = datetime.datetime.now()
                f.is_deleted = False
                f.size = item['size']
                f.save()
            except File.DoesNotExist:
                logger.info("New file found: %s" % path)
                File.objects.create(path=path,
                                    last_seen_at=datetime.datetime.now(),
                                    directory=current_dir,
                                    size=item['size'])
                new_file_count += 1

        url = response.get('nextPageURL')
        if url:
            time.sleep(1)
        else:
            pointless_call_to_make_coverage_notice_this_line()
            break

    if new_file_count > 0:
        find_sceneorg_results_files()
Beispiel #4
0
    def handle(self, *args, **kwargs):
        def callback(party):
            print("found results.txt for %s" % party)

        find_sceneorg_results_files(callback)
    def handle_noargs(self, **options):
        def callback(party):
            print "found results.txt for %s" % party

        find_sceneorg_results_files(callback)
Beispiel #6
0
def fetch_new_sceneorg_files(days=1):
	url = "https://files.scene.org/api/adhoc/latest-files/?days=%d" % days

	new_file_count = 0

	while True:
		req = urllib2.Request(url, None, {'User-Agent': settings.HTTP_USER_AGENT})
		page = urllib2.urlopen(req)
		response = json.loads(page.read())
		page.close()

		if not response.get('success'):
			logger.warning("scene.org API request returned non-success! %r" % response)
			break

		logger.info("API request to %s succeeded - %d files returned" % (url, len(response['files'])))

		for item in response['files']:
			# the fullPath field in the API consists of a byte string (de facto utf-8) interpreted
			# as windows-1252 and served to us as a Unicode string.
			# Here we encode as windows-1252 (to reconstruct the original bytestream as closely as
			# possible), then decode the bytestream as iso-8859-1 to embed that bytestream into
			# a unicode string that we can process and ultimately insert into the db.
			full_path = item['fullPath'].encode('Windows-1252', 'ignore').decode('iso-8859-1')
			path_components = full_path.split('/')[1:]
			dirs = path_components[:-1]

			path = '/'
			current_dir = Directory.objects.get_or_create(
				path='/', defaults={'last_seen_at': datetime.datetime.now()})

			for d in dirs:
				last_dir = current_dir
				path += d + '/'

				try:
					current_dir = Directory.objects.get(path=path)
					current_dir.last_seen_at = datetime.datetime.now()
					current_dir.is_deleted = False
					current_dir.save()
				except Directory.DoesNotExist:
					current_dir = Directory.objects.create(path=path, last_seen_at=datetime.datetime.now(), parent=last_dir)

			path += path_components[-1]

			try:
				f = File.objects.get(path=path)
				f.last_seen_at = datetime.datetime.now()
				f.is_deleted = False
				f.size = item['size']
				f.save()
			except File.DoesNotExist:
				logger.info("New file found: %s" % path)
				File.objects.create(
					path=path, last_seen_at=datetime.datetime.now(), directory=current_dir,
					size=item['size'])
				new_file_count += 1

		url = response.get('nextPageURL')
		if url:
			time.sleep(1)
		else:
			break

	if new_file_count > 0:
		find_sceneorg_results_files()
	def handle_noargs(self, **options):
		def callback(party):
			print "found results.txt for %s" % party

		find_sceneorg_results_files(callback)
Beispiel #8
0
def fetch_new_sceneorg_files(path, days=1):
	new_file_count = fetch_sceneorg_dir(path=path, days=days, async=False)

	if new_file_count > 0:
		find_sceneorg_results_files()
Beispiel #9
0
def fetch_new_sceneorg_files(path, days=1):
    new_file_count = fetch_sceneorg_dir(path=path, days=days, async=False)

    if new_file_count > 0:
        find_sceneorg_results_files()