Python mkdir_pの例、openprescribing.utils.mkdir_p Pythonの例

コード例 #1

0

ファイルを表示

    def handle(self, *args, **kwargs):
        self.verbose = (kwargs['verbosity'] > 1)

        date = datetime.date(kwargs['year'], kwargs['month'], 1)
        datestamp = date.strftime('%Y_%m')

        source_url = self.url_for_date(date)

        if source_url is None:
            raise CommandError('Could not find any data for %s' % datestamp)

        target_dir = os.path.join(
            settings.PIPELINE_DATA_BASEDIR,
            'patient_list_size',
            datestamp,
        )

        target_file = os.path.join(target_dir, 'patient_list_size_new.csv')

        mkdir_p(target_dir)

        if self.verbose:
            print 'Getting data for {}'.format(datestamp)

        self.get_data(target_file, source_url)

        if self.verbose:
            print "Done"

コード例 #2

0

ファイルを表示

ファイル: hscic_list_sizes.py プロジェクト: somulu/openprescribing

    def handle(self, *args, **kwargs):
        self.verbose = kwargs["verbosity"] > 1

        date = datetime.date(kwargs["year"], kwargs["month"], 1)
        datestamp = date.strftime("%Y_%m")

        url = date.strftime(
            "https://digital.nhs.uk/data-and-information/publications/statistical/patients-registered-at-a-gp-practice/%B-%Y"
        ).lower()

        rsp = requests.get(url)

        if rsp.status_code != 200:
            raise CommandError("Could not find any data for %s" % datestamp)

        filename = "gp-reg-pat-prac-quin-age.csv"
        tree = html.fromstring(rsp.content)
        source_url = tree.xpath(
            "//a[contains(@href, '{}')]/@href".format(filename))[0]

        target_dir = os.path.join(settings.PIPELINE_DATA_BASEDIR,
                                  "patient_list_size", datestamp)

        target_file = os.path.join(target_dir, "patient_list_size_new.csv")

        mkdir_p(target_dir)

        if self.verbose:
            print("Getting data for {}".format(datestamp))

        self.curl_and_return(source_url, target_file)

        if self.verbose:
            print("Done")

コード例 #3

0

ファイルを表示

    def setUpTestData(cls):
        for bnf_code, name in [
            ("0203020C0AAAAAA", "Adenosine_I/V Inf 3mg/ml 2ml Vl"),
            ("1003020U0AAAIAI", "Diclofenac Sod_Gel 2.32%"),
            ("1003020U0BBADAI", "Voltarol 12 Hour Emulgel P_Gel 2.32%"),
            ("1305020C0AAFVFV", "Coal Tar 10%/Salic Acid 5%/Aq_Crm"),
            ("1106000X0AAAIAI", "Piloc HCl_Eye Dps 6%"),
            ("090402000BBHCA0", "Nutrison Pack_Stnd"),
        ]:
            Presentation.objects.create(bnf_code=bnf_code, name=name)

        shutil.copytree(
            "dmd2/tests/data/dmd/1",
            "pipeline/test-data/data/dmd/2019_07_01/nhsbsa_dmd_7.4.0_20190701000001",
        )

        mkdir_p("pipeline/test-data/data/snomed_mapping/2019_07_01")

        shutil.copyfile(
            "dmd2/tests/data/bnf_code_mapping/mapping.xlsx",
            "pipeline/test-data/data/snomed_mapping/2019_07_01/mapping.xlsx",
        )

        # Import the data.  See dmd2/tests/data/README.txt for details of what
        # objects will be created.
        with patch("gcutils.bigquery.Client.upload_model"):
            call_command("import_dmd2")

        # Copy another, later, dataset into the data directory, for tests that
        # call the command again.
        shutil.copytree(
            "dmd2/tests/data/dmd/2",
            "pipeline/test-data/data/dmd/2019_07_08/nhsbsa_dmd_7.4.0_20190708000001",
        )

コード例 #4

0

ファイルを表示

ファイル: hscic_list_sizes.py プロジェクト: rebkwok/openprescribing

    def handle(self, *args, **kwargs):
        self.verbose = (kwargs['verbosity'] > 1)

        date = datetime.date(kwargs['year'], kwargs['month'], 1)
        datestamp = date.strftime('%Y_%m')

        url = date.strftime('http://digital.nhs.uk/pubs/numpatgp%b%y').lower()

        rsp = requests.get(url)

        if rsp.status_code != 200:
            raise CommandError('Could not find any data for %s' % datestamp)

        filename = date.strftime('gp-reg-pat-prac-quin-age-%b-%y').lower()
        tree = html.fromstring(rsp.content)
        source_url = tree.xpath(
            "//a[contains(@href, '{}')]/@href".format(filename))[0]

        target_dir = os.path.join(
            settings.PIPELINE_DATA_BASEDIR,
            'patient_list_size',
            datestamp,
        )

        target_file = os.path.join(target_dir, 'patient_list_size_new.csv')

        mkdir_p(target_dir)

        if self.verbose:
            print 'Getting data for {}'.format(datestamp)

        self.curl_and_return(source_url, target_file)

        if self.verbose:
            print "Done"

コード例 #5

0

ファイルを表示

    def handle(self, year, month, **kwargs):
        rsp = requests.get(
            "https://opendata.nhsbsa.net/api/3/action/package_show?id=english-prescribing-data-epd"
        )
        resources = rsp.json()["result"]["resources"]
        urls = [
            r["url"] for r in resources
            if r["name"] == "EPD_{year}{month:02d}".format(year=year,
                                                           month=month)
        ]
        assert len(urls) == 1, urls
        rsp = requests.get(urls[0], stream=True)
        assert rsp.ok

        dir_path = os.path.join(
            settings.PIPELINE_DATA_BASEDIR,
            "prescribing_v2",
            "{year}_{month:02d}".format(year=year, month=month),
        )
        mkdir_p(dir_path)
        filename = "epd_{year}{month:02d}.csv".format(year=year, month=month)

        with open(os.path.join(dir_path, filename), "wb") as f:
            for block in rsp.iter_content(32 * 1024):
                f.write(block)

コード例 #6

0

ファイルを表示

    def write_logs(self):
        """Record summary and details of oddities we've found in the data.

        We log (summary and details) the following things:
         * dm+d objects only present in mapping
         * VMPs with inferred BNF codes
         * VMPs without BNF codes
         * BNF codes with multiple dm+d objects
         * BNF codes with multiple dm+d objects where a name cannot be
           inferred
         * VMPPs that have different BNF code to their VMP
         * AMPPs that have different BNF code to their AMP

        We also log summaries of the number of objects imported.
        """

        mkdir_p(self.logs_path)

        for key in self.log_keys:
            with open(os.path.join(self.logs_path, key + ".csv"), "w") as f:
                writer = csv.writer(f)
                writer.writerows(self.logs[key])

        with open(os.path.join(self.logs_path, "summary.csv"), "w") as f:
            writer = csv.writer(f)
            for model in [VMP, AMP, VMPP, AMPP]:
                writer.writerow([model.__name__, model.objects.count()])
            for key in self.log_keys:
                writer.writerow([key, len(self.logs[key])])

コード例 #7

0

ファイルを表示

ファイル: fetch_bnf_codes.py プロジェクト: ebmdatalab/openprescribing

    def handle(self, *args, **kwargs):
        path = os.path.join(settings.PIPELINE_DATA_BASEDIR, "bnf_codes")
        year_and_month = datetime.date.today().strftime("%Y_%m")
        dir_path = os.path.join(path, year_and_month)
        mkdir_p(dir_path)
        zip_path = os.path.join(dir_path, "download.zip")

        base_url = "https://applications.nhsbsa.nhs.uk/infosystems/data/"

        session = requests.Session()
        session.cookies["JSESSIONID"] = kwargs["jsessionid"]

        url = base_url + "showDataSelector.do"
        params = {"reportId": "126"}
        rsp = session.get(url, params=params)

        tree = html.fromstring(rsp.content)
        options = tree.xpath('//select[@id="bnfVersion"]/option')

        year_to_bnf_version = {}
        for option in options:
            datestamp, version = option.text.split(" : ")
            date = datetime.datetime.strptime(datestamp, "%d-%m-%Y")
            year_to_bnf_version[date.year] = version

        year = max(year_to_bnf_version)
        version = year_to_bnf_version[year]

        url = base_url + "requestSelectedDownload.do"
        params = {
            "bnfVersion": version,
            "filePath": "",
            "dataView": "260",
            "format": "",
            "defaultReportIdDataSel": "",
            "reportId": "126",
            "action": "checkForAvailableDownload",
        }
        rsp = session.get(url, params=params)

        request_id = rsp.json()["requestNo"]

        url = base_url + "downloadAvailableReport.zip"
        params = {"requestId": request_id}
        rsp = session.post(url, params=params)
        with open(zip_path, "wb") as f:
            f.write(rsp.content)

        with zipfile.ZipFile(zip_path) as zf:
            zf.extractall(dir_path)

        csv_paths = glob.glob(os.path.join(dir_path, "*.csv"))

        assert len(csv_paths) == 1

        os.rename(csv_paths[0], os.path.join(dir_path, "bnf_codes.csv"))

コード例 #8

0

ファイルを表示

    def handle(self, *args, **kwargs):
        # The page lists available downloads.  The data is stored in a JSON
        # object.
        url = "https://data.gov.uk/dataset/176ae264-2484-4afe-a297-d51798eb8228/gp-practice-prescribing-data-presentation-level"
        rsp = requests.get(url)
        doc = BeautifulSoup(rsp.content, "html.parser")
        tag = doc.find("script", type="application/ld+json")
        metadata = json.loads(list(tag.descendants)[0])

        filename_fragment = {
            "addresses": "ADDR%20BNFT",
            "chemicals": "CHEM%20SUBS"
        }[kwargs["dataset"]]
        pattern = "T(\d{4})(\d{2})" + filename_fragment + ".CSV"

        urls = [
            record["contentUrl"] for record in metadata["distribution"]
            if filename_fragment in record["contentUrl"]
        ]

        # Iterate over the URLs, newest first, downloading as we go, and
        # stopping once we find a URL that we have already downloaded.
        for url in sorted(urls,
                          key=lambda url: url.split("/")[-1],
                          reverse=True):
            filename = url.split("/")[-1]
            tmp_filename = filename + ".tmp"
            # We ignore case here, as sometimes filename is .csv and sometimes .CSV.
            match = re.match(pattern, filename, re.I)
            year_and_month = "_".join(match.groups())

            dir_path = os.path.join(settings.PIPELINE_DATA_BASEDIR,
                                    "prescribing_metadata", year_and_month)

            if os.path.exists(os.path.join(dir_path, filename)):
                break

            # Older versions of the data have slightly different filenames.
            if os.path.exists(
                    os.path.join(dir_path, filename.replace("%20", "+"))):
                break

            mkdir_p(dir_path)

            rsp = requests.get(url)
            assert rsp.ok

            # Since we check for the presence of the file to determine whether
            # this data has already been fetched, we write to a temporary file
            # and then rename it.
            with open(os.path.join(dir_path, tmp_filename), "w") as f:
                f.write(rsp.text)

            os.rename(os.path.join(dir_path, tmp_filename),
                      os.path.join(dir_path, filename))

コード例 #9

0

ファイルを表示

    def handle(self, *args, **kwargs):
        # The page lists available downloads.  The data is stored in a JSON
        # object.
        url = 'https://data.gov.uk/dataset/176ae264-2484-4afe-a297-d51798eb8228/gp-practice-prescribing-data-presentation-level'
        rsp = requests.get(url)
        doc = BeautifulSoup(rsp.content, 'html.parser')
        tag = doc.find('script', type='application/ld+json')
        metadata = json.loads(tag.text)

        filename_fragment = {
            'addresses': 'ADDR%20BNFT',
            'chemicals': 'CHEM%20SUBS',
        }[kwargs['dataset']]
        pattern = 'T(\d{4})(\d{2})' + filename_fragment + '.CSV'

        urls = [
            record['contentUrl'] for record in metadata['distribution']
            if filename_fragment in record['contentUrl']
        ]

        # Iterate over the URLs, newest first, downloading as we go, and
        # stopping once we find a URL that we have already downloaded.
        for url in sorted(urls,
                          key=lambda url: url.split('/')[-1],
                          reverse=True):
            filename = url.split('/')[-1]
            tmp_filename = filename + '.tmp'
            # We ignore case here, as sometimes filename is .csv and sometimes .CSV.
            match = re.match(pattern, filename, re.I)
            year_and_month = '_'.join(match.groups())

            dir_path = os.path.join(settings.PIPELINE_DATA_BASEDIR,
                                    'prescribing_metadata', year_and_month)

            if os.path.exists(os.path.join(dir_path, filename)):
                break

            # Older versions of the data have slightly different filenames.
            if os.path.exists(
                    os.path.join(dir_path, filename.replace('%20', '+'))):
                break

            mkdir_p(dir_path)

            rsp = requests.get(url)
            assert rsp.ok

            # Since we check for the presence of the file to determine whether
            # this data has already been fetched, we write to a temporary file
            # and then rename it.
            with open(os.path.join(dir_path, tmp_filename), 'w') as f:
                f.write(rsp.content)

            os.rename(os.path.join(dir_path, tmp_filename),
                      os.path.join(dir_path, filename))

コード例 #10

0

ファイルを表示

ファイル: fetch_dmd.py プロジェクト: maykonmenezes/openprescribing

    def handle(self, *args, **kwargs):
        base_url = "https://isd.digital.nhs.uk/"
        session = requests.Session()

        login_url = base_url + "trud3/security/j_spring_security_check"
        params = {
            "j_username": settings.TRUD_USERNAME,
            "j_password": settings.TRUD_PASSWORD,
            "commit": "LOG+IN",
        }
        rsp = session.post(login_url, params)

        index_url = (
            base_url +
            "trud3/user/authenticated/group/0/pack/6/subpack/24/releases")
        rsp = session.get(index_url)

        doc = BeautifulSoup(rsp.text, "html.parser")
        latest_release_div = doc.find("div", class_="release")
        p = latest_release_div.find_all("p")[1]
        text = " ".join(p.text.splitlines()).strip()
        release_date = datetime.strptime(
            text, "Released on %A, %d %B %Y").strftime("%Y_%m_%d")
        download_href = latest_release_div.find(
            "a", class_="download-release")["href"]
        filename = download_href.split("/")[-1]

        dir_path = os.path.join(settings.PIPELINE_DATA_BASEDIR, "dmd",
                                release_date)
        zip_path = os.path.join(dir_path, filename)
        unzip_dir_path = os.path.join(dir_path, os.path.splitext(filename)[0])

        if os.path.exists(zip_path):
            return

        rsp = session.get(base_url + download_href, stream=True)

        mkdir_p(dir_path)

        with open(zip_path, "wb") as f:
            for block in rsp.iter_content(32 * 1024):
                f.write(block)

        with zipfile.ZipFile(zip_path) as zf:
            zf.extractall(unzip_dir_path)

        for nested_zip_path in glob.glob(os.path.join(unzip_dir_path,
                                                      "*.zip")):
            with zipfile.ZipFile(nested_zip_path) as zf:
                zf.extractall(unzip_dir_path)

コード例 #11

0

ファイルを表示

ファイル: fetch_prescribing_metadata.py プロジェクト: rebkwok/openprescribing

    def handle(self, *args, **kwargs):
        today = datetime.date.today()
        year = today.year
        month = today.month

        num_missing_months = 0
        filename_fragment = {
            'addresses': 'ADDR+BNFT',
            'chemicals': 'CHEM+SUBS',
        }[kwargs['dataset']]

        while True:
            date = datetime.date(year, month, 1)
            year_and_month = date.strftime('%Y_%m')  # eg 2017_01

            dir_path = os.path.join(
                settings.PIPELINE_DATA_BASEDIR,
                'prescribing_metadata',
                year_and_month
            )
            filename = date.strftime('T%Y%m{}.CSV').format(filename_fragment)
            file_path = os.path.join(dir_path, filename)

            if os.path.exists(file_path):
                break

            mkdir_p(dir_path)

            # eg http://datagov.ic.nhs.uk/presentation/2017_08_August/T201708ADDR+BNFT.CSV
            base_url = 'http://datagov.ic.nhs.uk/presentation'
            path_fragment = date.strftime('%Y_%m_%B')
            url = '{}/{}/{}'.format(base_url, path_fragment, filename)

            rsp = requests.get(url)

            if rsp.ok:
                with open(file_path, 'w') as f:
                    f.write(rsp.content)
            else:
                num_missing_months += 1
                if num_missing_months >= 6:
                    raise CommandError('No data for six months!')

            if month == 1:
                year -= 1
                month = 12
            else:
                month -= 1

コード例 #12

0

ファイルを表示

    def handle(self, year, month, **kwargs):
        year_and_month = "{year}_{month:02d}".format(year=year, month=month)
        filename = "EPD_{year}{month:02d}.csv".format(year=year, month=month)

        dir_path = os.path.join(settings.PIPELINE_DATA_BASEDIR,
                                "prescribing_v2", year_and_month)
        csv_path = os.path.join(dir_path, filename)
        mkdir_p(dir_path)

        url = "https://storage.googleapis.com/datopian-nhs/csv/" + filename
        rsp = requests.get(url, stream=True)
        assert rsp.ok

        with open(csv_path, "wb") as f:
            for block in rsp.iter_content(32 * 1024):
                f.write(block)

コード例 #13

0

ファイルを表示

ファイル: fetch_prescribing_data.py プロジェクト: rebkwok/openprescribing

    def download_csv(self, session, year_and_month, period_id):
        dir_path = os.path.join(self.path, year_and_month)
        zip_path = os.path.join(dir_path, 'download.zip')

        url = self.base_url + 'requestSelectedDownload.do'
        params = {
            'period': period_id,
            'filePath': '',
            'dataView': '255',
            'format': '',
            'periodType': 'MONTHLY',
            'defaultPeriod': '200',
            'defaultFilterType': 'MONTHLY',
            'organisation': '11',
            'dimensionHierarchyId': '1',
            'bnfChapter': '0',
            'defaultReportIdDataSel': '',
            'reportId': '124',
            'action': 'checkForAvailableDownload',
        }

        rsp = session.get(url, params=params)
        request_id = rsp.json()['requestNo']

        mkdir_p(dir_path)

        url = self.base_url + 'downloadAvailableReport.zip'
        params = {
            'requestId': request_id,
        }

        rsp = session.post(url, params=params, stream=True)

        total_size = int(rsp.headers['content-length'])

        progress_bar = tqdm(total=total_size, unit='B', unit_scale=True)

        with open(zip_path, 'wb') as f:
            for block in rsp.iter_content(32 * 1024):
                f.write(block)
                progress_bar.update(len(block))

        with zipfile.ZipFile(zip_path) as zf:
            zf.extractall(dir_path)

コード例 #14

0

ファイルを表示

ファイル: fetch_prescribing_data.py プロジェクト: maykonmenezes/openprescribing

    def download_csv(self, session, year_and_month, period_id):
        dir_path = os.path.join(self.path, year_and_month)
        zip_path = os.path.join(dir_path, "download.zip")

        url = self.base_url + "requestSelectedDownload.do"
        params = {
            "period": period_id,
            "filePath": "",
            "dataView": "255",
            "format": "",
            "periodType": "MONTHLY",
            "defaultPeriod": "200",
            "defaultFilterType": "MONTHLY",
            "organisation": "11",
            "dimensionHierarchyId": "1",
            "bnfChapter": "0",
            "defaultReportIdDataSel": "",
            "reportId": "124",
            "action": "checkForAvailableDownload",
        }

        rsp = session.get(url, params=params)
        request_id = rsp.json()["requestNo"]

        mkdir_p(dir_path)

        url = self.base_url + "downloadAvailableReport.zip"
        params = {"requestId": request_id}

        rsp = session.post(url, params=params, stream=True)

        total_size = int(rsp.headers["content-length"])

        progress_bar = tqdm(total=total_size, unit="B", unit_scale=True)

        with open(zip_path, "wb") as f:
            for block in rsp.iter_content(32 * 1024):
                f.write(block)
                progress_bar.update(len(block))

        with zipfile.ZipFile(zip_path) as zf:
            zf.extractall(dir_path)

コード例 #15

0

ファイルを表示

    def handle(self, *args, **options):
        if import_in_progress():
            notify_slack("Not checking numbers: import in progress")
            return

        previous_log_path = get_previous_log_path()

        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
        log_path = os.path.join(settings.CHECK_NUMBERS_BASE_PATH, timestamp)
        mkdir_p(log_path)

        numbers = {}
        options = Options()
        options.headless = True
        with webdriver.Firefox(options=options) as browser:
            browser.set_page_load_timeout(60)

            for name, path in paths_to_scrape():
                source = get_page_source(browser, path, name, log_path)
                numbers_list = extract_numbers(source)
                numbers[name] = {"path": path, "numbers": numbers_list}

        write_numbers(numbers, log_path)

        if previous_log_path is None:
            msg = "Not checking numbers: this is the first deploy since last import"
            notify_slack(msg)
            return

        previous_numbers = load_previous_numbers(previous_log_path)

        differences = compare_numbers(previous_numbers, numbers)

        if differences:
            msg = "The following pages have changed:\n\n"
            msg += "\n".join(differences)
            msg += "\n\nNext step: compare {} and {}".format(
                previous_log_path, log_path
            )
            notify_slack(msg)

コード例 #16

0

ファイルを表示

ファイル: fetch_bnf_snomed_mapping.py プロジェクト: jsoeterbroek/openprescribing

    def handle(self, *args, **kwargs):
        base_url = "https://www.nhsbsa.nhs.uk"

        rsp = requests.get(
            base_url +
            "/prescription-data/understanding-our-data/bnf-snomed-mapping")
        doc = BeautifulSoup(rsp.text, "html.parser")

        urls = set(a["href"] for a in doc.find_all("a", href=True)
                   if a["href"].endswith(".zip"))
        if len(urls) != 1:
            raise RuntimeError(
                "Expected exactly one zipfile URL, found {}".format(len(urls)))
        href = list(urls)[0]

        filename = href.split("/")[-1]
        datestamp = filename.split(".")[0].split("%20")[-1]
        release_date = datestamp[:4] + "_" + datestamp[4:6] + "_" + datestamp[
            6:]

        dir_path = os.path.join(settings.PIPELINE_DATA_BASEDIR,
                                "bnf_snomed_mapping", release_date)
        zip_path = os.path.join(dir_path, filename)

        if glob.glob(os.path.join(dir_path, "*.xlsx")):
            return

        mkdir_p(dir_path)

        rsp = requests.get(base_url + href, stream=True)
        rsp.raise_for_status()

        with open(zip_path, "wb") as f:
            for block in rsp.iter_content(32 * 1024):
                f.write(block)

        with zipfile.ZipFile(zip_path) as zf:
            zf.extractall(dir_path)

コード例 #17

0

ファイルを表示

    def handle(self, *args, **kwargs):
        path = os.path.join(settings.PIPELINE_DATA_BASEDIR, 'bnf_codes')
        year_and_month = datetime.date.today().strftime('%Y_%m')
        dir_path = os.path.join(path, year_and_month)
        mkdir_p(dir_path)
        zip_path = os.path.join(dir_path, 'download.zip')

        base_url = 'https://apps.nhsbsa.nhs.uk/infosystems/data/'

        session = requests.Session()
        session.cookies['JSESSIONID'] = kwargs['jsessionid']

        url = base_url + 'showDataSelector.do'
        params = {'reportId': '126'}
        rsp = session.get(url, params=params)

        tree = html.fromstring(rsp.content)
        options = tree.xpath('//select[@id="bnfVersion"]/option')

        year_to_bnf_version = {}
        for option in options:
            datestamp, version = option.text.split(' : ')
            date = datetime.datetime.strptime(datestamp, '%d-%m-%Y')
            year_to_bnf_version[date.year] = version

        year = max(year_to_bnf_version)
        version = year_to_bnf_version[year]

        url = base_url + 'requestSelectedDownload.do'
        params = {
            'bnfVersion': version,
            'filePath': '',
            'dataView': '260',
            'format': '',
            'defaultReportIdDataSel': '',
            'reportId': '126',
            'action': 'checkForAvailableDownload',
        }
        rsp = session.get(url, params=params)

        request_id = rsp.json()['requestNo']

        url = base_url + 'downloadAvailableReport.zip'
        params = {
            'requestId': request_id,
        }
        rsp = session.post(url, params=params, stream=True)

        total_size = int(rsp.headers['content-length'])

        with open(zip_path, 'wb') as f:
            tqdm_iterator = tqdm(
                rsp.iter_content(32 * 1024),
                total=total_size,
                unit='B',
                unit_scale=True
            )
            for block in tqdm_iterator:
                f.write(block)

        with zipfile.ZipFile(zip_path) as zf:
            zf.extractall(dir_path)

        csv_paths = glob.glob(os.path.join(dir_path, '*.csv'))

        assert len(csv_paths) == 1

        os.rename(csv_paths[0], os.path.join(dir_path, 'bnf_codes.csv'))

コード例 #18

0

ファイルを表示

    def handle(self, *args, **kwargs):
        if kwargs["jsessionid"] is None:
            # Note that this is mostly-duplicated above, but I can't see a nice
            # way of avoiding this.
            print("""
The files are on a site that requires you to log in.  To download the files,
you will need to visit the site in your browser and log in.  This will set a
cookie in your browser which you will need to pass to this command.

Specifically, you should:

    * Visit
        https://isd.digital.nhs.uk/trud3/user/authenticated/group/0/pack/6/subpack/24/releases
      in your browser
    * Sign up or log in
    * Copy the value of the JSESSIONID cookie
      * In Chrome, this can be found in the Application tab of Developer Tools
    * Paste this value below:
            """).strip()

            jsessionid = input()
        else:
            jsessionid = kwargs["jsessionid"]

        year = kwargs["year"]
        month = kwargs["month"]

        year_and_month = datetime.date(year, month, 1).strftime("%Y_%m")
        dir_path = os.path.join(settings.PIPELINE_DATA_BASEDIR, "dmd",
                                year_and_month)
        zip_path = os.path.join(dir_path, "download.zip")

        if os.path.exists(dir_path):
            print("Data already downloaded for", year_and_month)
            return

        mkdir_p(dir_path)

        session = requests.Session()
        session.cookies["JSESSIONID"] = jsessionid

        base_url = "https://isd.digital.nhs.uk/"

        rsp = session.get(
            base_url +
            "trud3/user/authenticated/group/0/pack/6/subpack/24/releases")

        tree = html.fromstring(rsp.content)

        divs = tree.find_class("release subscribed")

        div_dates = [extract_date(div) for div in divs]
        assert div_dates == sorted(div_dates, reverse=True)

        divs_for_month = []
        for div in divs:
            date = extract_date(div)
            if date.year == year and date.month == month:
                divs_for_month.append(div)

        if not divs_for_month:
            raise CommandError

        div = divs_for_month[-1]
        href = div.find_class("download-release")[0].attrib["href"]

        rsp = session.get(base_url + href, stream=True)

        with open(zip_path, "wb") as f:
            for block in rsp.iter_content(32 * 1024):
                f.write(block)