コード例 #1
0
ファイル: pipeline.py プロジェクト: thecodemasterk/openfda
  def run(self):
    logging.basicConfig(level=logging.INFO)

    zip_filename = config.data_dir('nsde/raw/nsde.zip')
    output_dir = config.data_dir('nsde/raw')
    os.system('mkdir -p %s' % output_dir)
    common.download(NSDE_DOWNLOAD, zip_filename)
    os.system('unzip -o %(zip_filename)s -d %(output_dir)s' % locals())

    csv_file = join(output_dir, self.csv_file_name)
    logging.info("Reading csv file: %s", (csv_file))
    os.system('mkdir -p %s' % dirname(self.output().path))
    df = pd.read_csv(csv_file, encoding='utf-8-sig')
    df.to_json(self.output().path, orient='records')
    with open(self.output().path, "w") as f:
      for row in df.iterrows():
        row[1].to_json(f)
        f.write("\n")
コード例 #2
0
import simplejson as json
import sys
import time

import arrow
import elasticsearch
import luigi

from openfda import config, common, elasticsearch_requests, index_util, parallel
from openfda.annotation_table.pipeline import CombineHarmonization
from openfda.tasks import AlwaysRunTask
from openfda.spl import annotate
from openfda.parallel import IdentityReducer

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
META_DIR = config.data_dir('spl/meta')
# Ensure meta directory is available for task tracking
common.shell_cmd('mkdir -p %s', META_DIR)

SPL_JS = join(RUN_DIR, 'spl/spl_to_json.js')
LOINC = join(RUN_DIR, 'spl/data/sections.csv')

SPL_S3_BUCKET = 's3://openfda-data-spl/data/'
SPL_S3_LOCAL_DIR = config.data_dir('spl/s3_sync')
SPL_S3_CHANGE_LOG = join(SPL_S3_LOCAL_DIR, 'change_log/SPLDocuments.csv')
SPL_BATCH_DIR = join(META_DIR, 'batch')
SPL_PROCESS_DIR = config.data_dir('spl/batches')

common.shell_cmd('mkdir -p %s', SPL_S3_LOCAL_DIR)
common.shell_cmd('mkdir -p %s', SPL_PROCESS_DIR)
コード例 #3
0
ファイル: pipeline.py プロジェクト: FDA/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('device_pma/extracted'))
コード例 #4
0
ファイル: pipeline.py プロジェクト: zheismysavior/openfda
Pipeline for converting CSV nsde data to JSON and importing into Elasticsearch.
'''

import glob
import os
from os.path import join, dirname

import luigi

from openfda import common, config, parallel, index_util
from openfda.common import newest_file_timestamp

NSDE_DOWNLOAD = \
  'https://download.open.fda.gov/Comprehensive_NDC_SPL_Data_Elements_File.zip'
NSDE_EXTRACT_DB = 'nsde/nsde.db'
NSDE_RAW_DIR = config.data_dir('nsde/raw')


class DownloadNSDE(luigi.Task):
    def output(self):
        return luigi.LocalTarget(join(NSDE_RAW_DIR, 'nsde.csv'))

    def run(self):
        output_dir = dirname(self.output().path)
        zip_filename = join(output_dir, 'nsde.zip')
        common.download(NSDE_DOWNLOAD, zip_filename)
        os.system('unzip -o %(zip_filename)s -d %(output_dir)s' % locals())
        os.rename(glob.glob(join(output_dir, '*.csv'))[0], self.output().path)


class NSDE2JSONMapper(parallel.Mapper):
コード例 #5
0
ファイル: pipeline.py プロジェクト: zheismysavior/openfda
 def output(self):
     return luigi.LocalTarget(config.data_dir('device_pma/extracted'))
コード例 #6
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('classification/extracted'))
コード例 #7
0
ファイル: pipeline.py プロジェクト: thecodemasterk/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('nsde/raw/nsde_raw.json'))
コード例 #8
0
 def output(self):
   return luigi.LocalTarget(os.path.join(config.data_dir('device_udi/annotate.db'), BATCH))
コード例 #9
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('registration/registration_listing.db'))
コード例 #10
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('registration/owner_operator.db'))
コード例 #11
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('registration/extracted'))
コード例 #12
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
from bs4 import BeautifulSoup
import elasticsearch
import luigi
import pandas
import requests
import simplejson as json
import urllib2

from openfda import common, config, elasticsearch_requests, index_util, parallel
from openfda import download_util
from openfda.tasks import AlwaysRunTask
from openfda.device_harmonization.pipeline import (Harmonized2OpenFDA,
  DeviceAnnotateMapper)

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
BASE_DIR = config.data_dir('registration')
common.shell_cmd('mkdir -p %s', BASE_DIR)
# A directory for holding files that track Task state
META_DIR = config.data_dir('registration/meta')
common.shell_cmd('mkdir -p %s', META_DIR)

DEVICE_REG_PAGE = ('http://www.fda.gov/MedicalDevices/'
                   'DeviceRegulationandGuidance/HowtoMarketYourDevice/'
                   'RegistrationandListing/ucm134495.htm')

S3_BUCKET = 's3://openfda-data-reglist/'
S3_LOCAL_DIR = config.data_dir('registration/s3_sync')

common.shell_cmd('mkdir -p %s', S3_LOCAL_DIR)

REMAPPED_FILES = {
コード例 #13
0
ファイル: pipeline.py プロジェクト: FDA/openfda
import sys
import time

import arrow
import elasticsearch
import luigi

from openfda import config, common, elasticsearch_requests, index_util, parallel
from openfda.annotation_table.pipeline import CombineHarmonization
from openfda.tasks import AlwaysRunTask
from openfda.spl import annotate
from openfda.parallel import IdentityReducer


RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
META_DIR = config.data_dir('spl/meta')
# Ensure meta directory is available for task tracking
common.shell_cmd('mkdir -p %s', META_DIR)

SPL_JS = join(RUN_DIR, 'spl/spl_to_json.js')
LOINC = join(RUN_DIR, 'spl/data/sections.csv')

SPL_S3_BUCKET = 's3://openfda-data-spl/data/'
SPL_S3_LOCAL_DIR = config.data_dir('spl/s3_sync')
SPL_S3_CHANGE_LOG = join(SPL_S3_LOCAL_DIR, 'change_log/SPLDocuments.csv')
SPL_BATCH_DIR = join(META_DIR, 'batch')
SPL_PROCESS_DIR = config.data_dir('spl/batches')

common.shell_cmd('mkdir -p %s', SPL_S3_LOCAL_DIR)
common.shell_cmd('mkdir -p %s', SPL_PROCESS_DIR)
コード例 #14
0
ファイル: pipeline.py プロジェクト: FDA/openfda
 def output(self):
   return luigi.LocalTarget(os.path.join(config.data_dir('device_udi/extracted'), BATCH))
コード例 #15
0
ファイル: pipeline.py プロジェクト: LiuFang816/SALSTM_py_data
 def output(self):
     return luigi.LocalTarget(config.data_dir('510k/raw'))
コード例 #16
0
ファイル: pipeline.py プロジェクト: LiuFang816/SALSTM_py_data
 def output(self):
     return luigi.LocalTarget(config.data_dir('510k', 'json.db'))
コード例 #17
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('registration/annotate.db'))
コード例 #18
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
from os.path import basename, dirname, join
import re
import subprocess
import urllib2
import urlparse

import arrow
import simplejson as json

from openfda.tasks import DependencyTriggeredTask
from openfda import common, config, parallel, spl
from openfda.annotation_table import unii_harmonization
from openfda.spl import process_barcodes, extract

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
data_dir = config.data_dir('harmonization')
BATCH_DATE = arrow.utcnow().ceil('week').format('YYYYMMDD')
BASE_DIR = config.data_dir('harmonization/batches/%s' % BATCH_DATE)
SPL_S3_DIR = config.data_dir('spl/s3_sync')
TMP_DIR = config.tmp_dir()

common.shell_cmd('mkdir -p %s', data_dir)
common.shell_cmd('mkdir -p %s', BASE_DIR)
common.shell_cmd('mkdir -p %s', TMP_DIR)

SPL_SET_ID_INDEX = join(BASE_DIR, 'spl_index.db')
DAILYMED_PREFIX = 'ftp://public.nlm.nih.gov/nlmdata/.dailymed/'

PHARM_CLASS_DOWNLOAD = \
  DAILYMED_PREFIX + 'pharmacologic_class_indexing_spl_files.zip'
コード例 #19
0
ファイル: pipeline.py プロジェクト: amlydu/openfda
import sys
import time

import arrow
import elasticsearch
import luigi

from openfda import config, common, elasticsearch_requests, index_util, parallel
from openfda.annotation_table.pipeline import CombineHarmonization
from openfda.tasks import AlwaysRunTask
from openfda.spl import annotate
from openfda.parallel import IdentityReducer


RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
META_DIR = config.data_dir("spl/meta")
# Ensure meta directory is available for task tracking
common.shell_cmd("mkdir -p %s", META_DIR)

SPL_JS = join(RUN_DIR, "spl/spl_to_json.js")
LOINC = join(RUN_DIR, "spl/data/sections.csv")

SPL_S3_BUCKET = "s3://openfda-data-spl/data/"
SPL_S3_LOCAL_DIR = config.data_dir("spl/s3_sync")
SPL_S3_CHANGE_LOG = join(SPL_S3_LOCAL_DIR, "change_log/SPLDocuments.csv")
SPL_BATCH_DIR = join(META_DIR, "batch")
SPL_PROCESS_DIR = config.data_dir("spl/batches")

common.shell_cmd("mkdir -p %s", SPL_S3_LOCAL_DIR)
common.shell_cmd("mkdir -p %s", SPL_PROCESS_DIR)
コード例 #20
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
import arrow
import elasticsearch
import luigi
import requests
import simplejson as json

from openfda import common, config, index_util, parallel
from openfda import device_common, download_util
from openfda.device_harmonization.pipeline import (Harmonized2OpenFDA,
  DeviceAnnotateMapper)
from openfda.tasks import AlwaysRunTask

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
# A directory for holding files that track Task state
META_DIR = config.data_dir('classification/meta')
common.shell_cmd('mkdir -p %s', META_DIR)


DEVICE_CLASS_ZIP = ('http://www.accessdata.fda.gov/premarket/'
                    'ftparea/foiclass.zip')


class DownloadFoiClass(luigi.Task):
  def requires(self):
    return []

  def output(self):
    return luigi.LocalTarget(config.data_dir('classification/raw'))

  def run(self):
コード例 #21
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('res/batches/%s' % self.batch.strftime('%Y%m%d')))
コード例 #22
0
 def output(self):
     file_name = 'res/batches/%s/enforcement.csv' % self.batch.strftime(
         '%Y%m%d')
     return luigi.LocalTarget(config.data_dir(file_name))
コード例 #23
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
from os.path import dirname, join
import sys

import arrow
import elasticsearch
import luigi
import requests
import simplejson as json

from openfda import common, config, elasticsearch_requests, index_util, parallel
from openfda.device_harmonization.pipeline import (Harmonized2OpenFDA,
  DeviceAnnotateMapper)
from openfda.tasks import AlwaysRunTask

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
META_DIR = config.data_dir('device_recall/meta')
common.shell_cmd('mkdir -p %s', META_DIR)

DEVICE_RECALL_BUCKET = 's3://openfda-device-recalls/'
DEVICE_RECALL_LOCAL_DIR = config.data_dir('device_recall/s3_sync')

class SyncS3DeviceRecall(AlwaysRunTask):
  bucket = DEVICE_RECALL_BUCKET
  local_dir = DEVICE_RECALL_LOCAL_DIR

  def _run(self):
    common.cmd(['mkdir', '-p', self.local_dir])
    common.cmd(['aws',
                '--profile=' + config.aws_profile(),
                's3',
                'sync',
コード例 #24
0
'''

import collections
import logging
import os
from os.path import basename, dirname, join
import sys

import luigi
import simplejson as json

from openfda import common, config, parallel
from openfda.parallel import mapreduce, Collection, Mapper, PivotReducer

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
DATA_DIR = config.data_dir('device_harmonization')
common.shell_cmd('mkdir -p %s', DATA_DIR)

PLUCK_MAP = {
  'device_pma': [
    'applicant',
    'trade_name',
    'generic_name',
    'decision_date',
    'advisory_committee',
    'pma_number',
    'product_code',
    'advisory_committee',
  ],
  '510k': [
    'applicant',
コード例 #25
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('device_recall/annotate.db'))
コード例 #26
0
import glob
import logging
import os
import re
from urllib.parse import urljoin
from urllib.request import urlopen
from os.path import join, dirname

import luigi
from bs4 import BeautifulSoup

from openfda import common, config, index_util, parallel
from openfda.common import convert_unicode, newest_file_timestamp

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
BASE_DIR = config.data_dir('caers')
DOWNLOAD_DIR = config.data_dir('caers/raw')
common.shell_cmd('mkdir -p %s', BASE_DIR)

CAERS_DOWNLOAD_PAGE_URL = 'https://www.fda.gov/food/compliance-enforcement-food/cfsan-adverse-event-reporting-system-caers'

RENAME_MAP = {
  'report id': 'report_number',
  'caers created date': 'date_created',
  'date of event': 'date_started',
  'product type': 'role',
  'product': 'name_brand',
  'product code': 'industry_code',
  'description': 'industry_name',
  'patient age': 'age',
  'age units': 'age_unit',
コード例 #27
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
import arrow
import elasticsearch
import luigi
import requests
import simplejson as json

from openfda import common, config, elasticsearch_requests, index_util, parallel
from openfda import download_util
from openfda.device_clearance import transform
from openfda.device_harmonization.pipeline import (Harmonized2OpenFDA,
  DeviceAnnotateMapper)
from openfda.tasks import AlwaysRunTask

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
# A directory for holding files that track Task state
META_DIR = config.data_dir('510k/meta')
common.shell_cmd('mkdir -p %s', META_DIR)


CLEARED_DEVICE_URL = 'http://www.accessdata.fda.gov/premarket/ftparea/'
CLEARED_DEV_ZIPS = [CLEARED_DEVICE_URL + 'pmn96cur.zip',
  CLEARED_DEVICE_URL + 'pmn9195.zip',
  CLEARED_DEVICE_URL + 'pmn8690.zip',
  CLEARED_DEVICE_URL + 'pmn8185.zip',
  CLEARED_DEVICE_URL + 'pmn7680.zip']


class Download_510K(luigi.Task):
  def requires(self):
    return []
コード例 #28
0
ファイル: pipeline.py プロジェクト: FDA/openfda
 def output(self):
   return luigi.LocalTarget(os.path.join(config.data_dir('device_udi/annotate.db'), BATCH))
コード例 #29
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('510k/raw'))
コード例 #30
0
ファイル: pipeline.py プロジェクト: LiuFang816/SALSTM_py_data
import arrow
import elasticsearch
import luigi
import requests
import simplejson as json

from openfda import common, config, elasticsearch_requests, index_util, parallel
from openfda import download_util
from openfda.device_clearance import transform
from openfda.device_harmonization.pipeline import (Harmonized2OpenFDA,
                                                   DeviceAnnotateMapper)
from openfda.tasks import AlwaysRunTask

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
# A directory for holding files that track Task state
META_DIR = config.data_dir('510k/meta')
common.shell_cmd('mkdir -p %s', META_DIR)

CLEARED_DEVICE_URL = 'http://www.accessdata.fda.gov/premarket/ftparea/'
CLEARED_DEV_ZIPS = [
    CLEARED_DEVICE_URL + 'pmn96cur.zip', CLEARED_DEVICE_URL + 'pmn9195.zip',
    CLEARED_DEVICE_URL + 'pmn8690.zip', CLEARED_DEVICE_URL + 'pmn8185.zip',
    CLEARED_DEVICE_URL + 'pmn7680.zip'
]


class Download_510K(luigi.Task):
    def requires(self):
        return []

    def output(self):
コード例 #31
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('510k/extracted'))
コード例 #32
0
ファイル: pipeline.py プロジェクト: LiuFang816/SALSTM_py_data
 def output(self):
     return luigi.LocalTarget(config.data_dir('510k/extracted'))
コード例 #33
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('510k', 'json.db'))
コード例 #34
0
import glob
import logging
import os
import sys
import traceback

import arrow
import luigi
import xmltodict
from openfda import common, config, index_util, parallel
from openfda.device_harmonization.pipeline import (Harmonized2OpenFDA,
                                                   DeviceAnnotateMapper)

DEVICE_UDI_BUCKET = 's3://cdrh-data'
DEVICE_UDI_LOCAL_DIR = config.data_dir('device_udi/s3_sync')
BATCH = arrow.utcnow().floor('day').format('YYYYMMDD')
AWS_CLI = 'aws'


class SyncS3DeviceUDI(luigi.Task):
  bucket = DEVICE_UDI_BUCKET
  local_dir = DEVICE_UDI_LOCAL_DIR
  aws = AWS_CLI

  def flag_file(self):
    return os.path.join(self.local_dir, '.last_sync_time')

  def complete(self):
    # Only run S3 sync once per day.
    if config.disable_downloads():
コード例 #35
0
ファイル: pipeline.py プロジェクト: FDA/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('nsde/raw/nsde_raw.json'))
コード例 #36
0
 def output(self):
   return luigi.LocalTarget(os.path.join(config.data_dir('device_udi/extracted'), BATCH))
コード例 #37
0
ファイル: pipeline.py プロジェクト: FDA/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir(NSDE_EXTRACT_DB))
コード例 #38
0
import json
import os
import re
from os.path import join, dirname

import arrow
import luigi
import pandas as pd

from openfda import common, config, index_util, parallel
from openfda.tasks import AlwaysRunTask

RUN_DIR = dirname(os.path.abspath(__file__))

S3_ACCESS_LOGS_BUCKET = 's3://openfda-logs/download/'
S3_ACCESS_LOGS_DIR = config.data_dir('downloadstats/s3_logs_raw')
S3_STATS_DB_DIR = config.data_dir('downloadstats/s3_stats.db')
S3_ACCESS_LOGS_CUTOFF = arrow.get('2017-03-01')

CF_ACCESS_LOGS_BUCKET = 's3://openfda-splash-logs/download-cf-logs/'
CF_ACCESS_LOGS_DIR = config.data_dir('downloadstats/cf_logs_raw')
CF_STATS_DB_DIR = config.data_dir('downloadstats/cf_stats.db')

TOTAL_STATS_DB_DIR = config.data_dir('downloadstats/total_stats.db')

ENDPOINT_INDEX_MAP = {
  'animalandveterinary/event': 'animalandveterinarydrugevent',
  'drug/event': 'drugevent',
  'drug/label': 'druglabel',
  'drug/enforcement': 'drugenforcement',
  'drug/ndc': 'ndc',
コード例 #39
0
import arrow
import elasticsearch
import luigi
import requests
import simplejson as json

from openfda import common, config, index_util, parallel
from openfda import device_common, download_util
from openfda.device_harmonization.pipeline import (Harmonized2OpenFDA,
                                                   DeviceAnnotateMapper)
from openfda.tasks import AlwaysRunTask

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
# A directory for holding files that track Task state
META_DIR = config.data_dir('classification/meta')
common.shell_cmd('mkdir -p %s', META_DIR)

DEVICE_CLASS_ZIP = ('https://www.accessdata.fda.gov/premarket/'
                    'ftparea/foiclass.zip')


class DownloadFoiClass(luigi.Task):
    def requires(self):
        return []

    def output(self):
        return luigi.LocalTarget(config.data_dir('classification/raw'))

    def run(self):
        output_filename = join(self.output().path,
コード例 #40
0
 def output(self):
     return luigi.LocalTarget(config.data_dir('classification/annotate.db'))
コード例 #41
0
 def output(self):
     return luigi.LocalTarget(config.data_dir('classification/extracted'))
コード例 #42
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('classification/annotate.db'))
コード例 #43
0
ファイル: pipeline.py プロジェクト: pressleydavid/openfda
import time
import urllib2

import arrow
import elasticsearch
import luigi

from openfda import parallel, config, index_util, elasticsearch_requests
from openfda.annotation_table.pipeline import CombineHarmonization
from openfda.faers import annotate
from openfda.faers import xml_to_json
from openfda.tasks import AlwaysRunTask, DependencyTriggeredTask

# this should be a symlink to wherever the real data directory is
RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
BASE_DIR = config.data_dir()
FAERS_HISTORIC = (
    'http://www.fda.gov/Drugs/GuidanceCompliance'
    'RegulatoryInformation/Surveillance/AdverseDrugEffects/ucm083765.htm')
FAERS_CURRENT = (
    'http://www.fda.gov/Drugs/GuidanceCompliance'
    'RegulatoryInformation/Surveillance/AdverseDrugEffects/ucm082193.htm')

MAX_RECORDS_PER_FILE = luigi.IntParameter(-1, is_global=True)


class DownloadDataset(AlwaysRunTask):
    '''
  This task downloads all datasets that have not yet been fetched.
  '''
    def _fetch(self):
コード例 #44
0
ファイル: pipeline.py プロジェクト: zheismysavior/openfda
''' Device pipeline for downloading, transforming to JSON and loading COVID-19 Serological Testing Validation Project
 data into Elasticsearch.
'''

import glob
from os.path import dirname

import luigi
import arrow

from openfda import common, config, index_util, parallel
from openfda.tasks import AlwaysRunTask
from openfda.common import first_file_timestamp

SEROLOGY_TEST_BUCKET = 's3://openfda-covid19serology/'
SEROLOGY_TEST_SYNC_DIR = config.data_dir('covid19serology/s3_sync')
SEROLOGY_TEST_JSON_DB_DIR = config.data_dir('covid19serology/json.db')


class SyncS3SerologyTest(AlwaysRunTask):

  def _run(self):
    common.cmd(['mkdir', '-p', SEROLOGY_TEST_SYNC_DIR])
    common.cmd(['aws',
                '--profile=' + config.aws_profile(),
                's3',
                'sync',
                SEROLOGY_TEST_BUCKET,
                SEROLOGY_TEST_SYNC_DIR])

  def output(self):
コード例 #45
0
ファイル: pipeline.py プロジェクト: raz101zor/openfda
import csv
import re
import logging
import os
from os.path import dirname, join

import arrow
import datetime
import luigi

from openfda import common, config, index_util, parallel

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
BASE_DIR = config.data_dir('caers')
common.shell_cmd('mkdir -p %s', BASE_DIR)

S3_BUCKET = 's3://openfda-data-caers/'
S3_LOCAL_DIR = config.data_dir('caers/s3_sync')
# TODO(hansnelsen): initiate and resolve naming convention for this file and
#                   s3 bucket. Currently, the file is downloaded from
#                   s3://openfda-lonnie/caers/ (the naming of this file is
#                   not consistent). The pipeline engineer downloads it, renames
#                   it and then uploaded manually to the above bucket.
CAERS_FILE = 'caers.csv'
logging.info(S3_LOCAL_DIR, 'dir')
common.shell_cmd('mkdir -p %s', S3_LOCAL_DIR)

RENAME_MAP = {
  'Report #': 'report_number',
  'Created Date': 'date_created',
  'Event Start Date': 'date_started',
コード例 #46
0
ファイル: pipeline.py プロジェクト: zheismysavior/openfda
import glob
import os
from os.path import dirname, join

import luigi

from openfda import common, config, download_util, index_util, parallel
from openfda.common import first_file_timestamp
from openfda.device_harmonization.pipeline import (Harmonized2OpenFDA,
                                                   DeviceAnnotateMapper)
from openfda.device_pma import transform

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
# A directory for holding files that track Task state
META_DIR = config.data_dir('device_pma/meta')
RAW_DIR = config.data_dir('device_pma/raw')
common.shell_cmd('mkdir -p %s', META_DIR)

DEVICE_PMA_ZIP = 'https://www.accessdata.fda.gov/premarket/ftparea/pma.zip'


class DownloadPMA(luigi.Task):
    def requires(self):
        return []

    def output(self):
        return luigi.LocalTarget(RAW_DIR)

    def run(self):
        output_filename = join(self.output().path,
コード例 #47
0
import os
import re
import sys
import traceback

import arrow
import luigi
from lxml import etree

from openfda import common, config, index_util, parallel
from openfda.adae import annotate
from openfda.annotation_table.pipeline import CombineHarmonization
from openfda.common import newest_file_timestamp

ADAE_BUCKET = 's3://openfda-data-adae'
ADAE_LOCAL_DIR = config.data_dir('adae/s3_sync')
BATCH = arrow.utcnow().floor('day').format('YYYYMMDD')
AWS_CLI = 'aws'

# TODO: move to an external file once the list grows unmanageable.
NULLIFIED = [
    'US-FDACVM-2018-US-045311.xml', 'US-FDACVM-2018-US-048571.xml',
    'US-FDACVM-2018-US-046672.xml', 'US-FDACVM-2017-US-042492.xml',
    'US-FDACVM-2018-US-044065.xml', 'US-FDACVM-2017-US-070108.xml',
    'US-FDACVM-2017-US-002864.xml', 'US-FDACVM-2017-US-002866.xml',
    'US-FDACVM-2017-US-052458.xml', 'US-FDACVM-2017-US-055193.xml',
    'US-FDACVM-2017-US-043931.xml', 'US-FDACVM-2018-US-002321.xml',
    'US-FDACVM-2018-US-063536.xml', 'US-FDACVM-2015-US-221044.xml',
    'US-FDACVM2019-US-016263.xml', 'US-FDACVM-2016-US-062923.xml',
    'US-FDACVM-2017-US-001483.xml', 'US-FDACVM-2017-US-009155.xml',
    'US-FDACVM-2017-US-028125.xml', 'US-FDACVM-2017-US-033030.xml',
コード例 #48
0
ファイル: pipeline.py プロジェクト: zheismysavior/openfda
 def output(self):
     return luigi.LocalTarget(config.data_dir('device_pma/json.db'))
コード例 #49
0
 def output(self):
     return luigi.LocalTarget(
         os.path.join(config.data_dir('adae/json.db'), BATCH))
コード例 #50
0
ファイル: pipeline.py プロジェクト: serayamaouche/openfda
 def output(self):
     return luigi.LocalTarget(
         config.data_dir('res/batches/%s' % self.batch.strftime('%Y%m%d')))
コード例 #51
0
ファイル: pipeline.py プロジェクト: raz101zor/openfda
from os.path import dirname, join
import sys

import arrow
import elasticsearch
import luigi
import requests
import simplejson as json

from openfda import common, config, elasticsearch_requests, index_util, parallel
from openfda.device_harmonization.pipeline import (Harmonized2OpenFDA,
                                                   DeviceAnnotateMapper)
from openfda.tasks import AlwaysRunTask

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
META_DIR = config.data_dir('device_recall/meta')
common.shell_cmd('mkdir -p %s', META_DIR)

DEVICE_RECALL_BUCKET = 's3://openfda-device-recalls/'
DEVICE_RECALL_LOCAL_DIR = config.data_dir('device_recall/s3_sync')


class SyncS3DeviceRecall(AlwaysRunTask):
    bucket = DEVICE_RECALL_BUCKET
    local_dir = DEVICE_RECALL_LOCAL_DIR

    def _run(self):
        common.cmd(['mkdir', '-p', self.local_dir])
        common.cmd([
            'aws', '--profile=' + config.aws_profile(), 's3', 'sync',
            self.bucket, self.local_dir
コード例 #52
0
ファイル: pipeline.py プロジェクト: zheismysavior/openfda
 def output(self):
     return luigi.LocalTarget(config.data_dir(NSDE_EXTRACT_DB))
コード例 #53
0
ファイル: pipeline.py プロジェクト: raz101zor/openfda
 def output(self):
     return luigi.LocalTarget(config.data_dir('device_recall/annotate.db'))
コード例 #54
0
ファイル: pipeline.py プロジェクト: FDA/openfda
import arrow
import elasticsearch
import luigi
import requests
import simplejson as json

from openfda import common, config, download_util, elasticsearch_requests, index_util, parallel

from openfda.device_harmonization.pipeline import (Harmonized2OpenFDA,
  DeviceAnnotateMapper)
from openfda.device_pma import transform
from openfda.tasks import AlwaysRunTask

RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
# A directory for holding files that track Task state
META_DIR = config.data_dir('device_pma/meta')
common.shell_cmd('mkdir -p %s', META_DIR)

DEVICE_PMA_ZIP = 'https://www.accessdata.fda.gov/premarket/ftparea/pma.zip'

class DownloadPMA(luigi.Task):
  def requires(self):
    return []

  def output(self):
    return luigi.LocalTarget(config.data_dir('device_pma/raw'))

  def run(self):
    output_filename = join(self.output().path, DEVICE_PMA_ZIP.split('/')[-1])
    common.download(DEVICE_PMA_ZIP, output_filename)
コード例 #55
0
ファイル: pipeline.py プロジェクト: ColMac/openfda
import urllib2

import arrow
import elasticsearch
import luigi

from openfda import parallel, config, index_util, elasticsearch_requests
from openfda.annotation_table.pipeline import CombineHarmonization
from openfda.faers import annotate
from openfda.faers import xml_to_json
from openfda.tasks import AlwaysRunTask, DependencyTriggeredTask


# this should be a symlink to wherever the real data directory is
RUN_DIR = dirname(dirname(os.path.abspath(__file__)))
BASE_DIR = config.data_dir()
FAERS_HISTORIC = ('http://www.fda.gov/Drugs/GuidanceCompliance'
  'RegulatoryInformation/Surveillance/AdverseDrugEffects/ucm083765.htm')
FAERS_CURRENT = ('http://www.fda.gov/Drugs/GuidanceCompliance'
  'RegulatoryInformation/Surveillance/AdverseDrugEffects/ucm082193.htm')

MAX_RECORDS_PER_FILE = luigi.IntParameter(-1, is_global=True)

class DownloadDataset(AlwaysRunTask):
  '''
  This task downloads all datasets that have not yet been fetched.
  '''
  def _fetch(self):
    for page in [self._faers_current.find_all(href=re.compile('.*.zip')),
                 self._faers_historic.find_all(href=re.compile('.*.zip'))]:
      for a in page:
コード例 #56
0
ファイル: pipeline.py プロジェクト: FDA/openfda
 def output(self):
   return luigi.LocalTarget(config.data_dir('device_pma/json.db'))
コード例 #57
0
ファイル: pipeline.py プロジェクト: FDA/openfda
import glob
import logging
import os
import sys
import traceback

import arrow
import luigi
import xmltodict
from openfda import common, config, index_util, parallel
from openfda.device_harmonization.pipeline import (Harmonized2OpenFDA,
                                                   DeviceAnnotateMapper)

DEVICE_UDI_BUCKET = 's3://cdrh-data'
DEVICE_UDI_LOCAL_DIR = config.data_dir('device_udi/s3_sync')
BATCH = arrow.utcnow().floor('day').format('YYYYMMDD')
AWS_CLI = 'aws'


class SyncS3DeviceUDI(luigi.Task):
  bucket = DEVICE_UDI_BUCKET
  local_dir = DEVICE_UDI_LOCAL_DIR
  aws = AWS_CLI

  def flag_file(self):
    return os.path.join(self.local_dir, '.last_sync_time')

  def complete(self):
    # Only run S3 sync once per day.
    if config.disable_downloads():