Beispiel #1
0
 def _caller(*args, **kwargs):
     source_name = func.__module__.split('.')[-1]
     country = args[0] if (len(args) > 0
                           and isinstance(args[0], Country)) else None
     try:
         with transaction.atomic():
             return func(*args, **kwargs)
     except Exception as e:
         # Log error to cronjob
         CronJob.sync_cron({
             'name':
             source_name,
             'message':
             (f'Error querying {source_name}.' +
              (f' For Country: {country}.' if country else '') +
              f'\n\n' + traceback.format_exc()),
             'status':
             CronJobStatus.ERRONEOUS,
         })
         logger.error(
             f"Failed to load <{source_name}:{func.__name__}>" +
             (f'For Country: {country}' if country else '') +
             (f' {error_message}' if error_message else ''),
             exc_info=True,
         )
Beispiel #2
0
    def handle(self, *args, **options):
        logger.info('Starting appeals ingest')
        new, modified, bilaterals = self.get_new_or_modified_appeals()
        logger.info('%s current appeals' % Appeal.objects.all().count())
        logger.info('Creating %s new appeals' % len(new))
        logger.info('Updating %s existing appeals that have been modified' %
                    len(modified))

        num_created = 0
        for i, r in enumerate(new):
            fields = self.parse_appeal_record(r, is_new_appeal=True)
            if fields[
                    'code'] in bilaterals:  # correction of the appeal record due to appealbilaterals api
                fields['amount_funded'] += round(bilaterals[fields['code']], 1)
            try:
                Appeal.objects.create(**fields)
            except Exception as e:
                logger.error(str(e)[:100])
                logger.error('Could not create appeal with code %s' %
                             fields['code'])
                continue
            num_created = num_created + 1

        num_updated = 0
        for i, r in enumerate(modified):
            fields = self.parse_appeal_record(r, is_new_appeal=False)
            if fields[
                    'code'] in bilaterals:  # correction of the appeal record due to appealbilaterals api
                fields['amount_funded'] += round(bilaterals[fields['code']], 1)

            try:
                appeal, created = Appeal.objects.update_or_create(
                    code=fields['code'], defaults=fields)
            except Exception as e:
                logger.error(str(e)[:100])
                logger.error('Could not update appeal with code %s' %
                             fields['code'])
                continue
            num_updated = num_updated + 1

        CronJobSum = Appeal.objects.all().count()
        logger.info('%s appeals created' % num_created)
        logger.info('%s appeals updated' % num_updated)
        logger.info('%s total appeals' % CronJobSum)
        logger.info('Appeals ingest completed')

        body = {
            "name":
            "ingest_appeals",
            "message":
            'Appeals ingest completed, %s total appeals (%s new, %s existing).'
            % (CronJobSum, num_created, num_updated),
            "num_result":
            CronJobSum,
            "status":
            CronJobStatus.SUCCESSFUL
        }
        CronJob.sync_cron(body)
Beispiel #3
0
def prefetch():
    data = {}

    url = API_ENDPOINT
    page = 1
    now = datetime.datetime.now()
    daterange = f'{now.year - 10}:{now.year}'
    while True:
        # TODO: lastupdated
        rs = requests.get(f'{url}?date={daterange}', params={
            'format': 'json',
            'source': 50,
            'per_page': 5000 - 1,  # WD throws error on 5000
            'page': page,
        })
        if rs.status_code != 200:
            body = { "name": "WB", "message": "Error querying WorldBank feed at " + url, "status": CronJobStatus.ERRONEOUS } # not every case is catched here, e.g. if the base URL is wrong...
            CronJob.sync_cron(body)
            return data
        rs = rs.json()

        for pop_data in rs[1]:
            geo_code = pop_data['country']['id']
            pop = pop_data['value']
            year = pop_data['date']
            if len(geo_code) == 3:   # Admin Level 0
                pcountry = get_country_by_iso3(geo_code)
                if pcountry is None:
                    continue
                geo_id = pcountry.alpha_2
            else:  # Should be Admin Level 1
                # NOTE: District code's structure is <ISO2>_<Number>, so using ISO2
                geo_code = geo_code[-6:]
                pcountry = get_country_by_iso3(geo_code[:3])
                if pcountry is None:
                    continue
                iso2 = pcountry.alpha_2
                geo_id = f'{iso2}{geo_code[3:]}'

            geo_id = geo_id.upper()
            if data.get(geo_id) is None or data.get(geo_id)[1] < year:
                data[geo_id] = (pop, year)

        if page >= rs[0]['pages']:
            break
        page += 1
    body = { "name": "WB", "message": "Done querying WorldBank feed at " + url, "num_result": len(data), "status": CronJobStatus.SUCCESSFUL }
    CronJob.sync_cron(body)
    return data
Beispiel #4
0
def _crises_event_prefetch():
    query_params = json.dumps({
        'limit': 1000,
        'filter': {
            'operator': 'AND',
            'conditions': [
                {
                    'field': 'primary_type.code',
                    'value': [type_code for type_code, _ in PastCrisesEvent.CHOICES],
                    'operator': 'OR'
                }
            ]
        },
        'fields': {
            'include': ['date.created', 'primary_country.iso3', 'primary_type.code']
        }
    })

    url = DISASTER_API
    data = {}
    while True:
        response = requests.post(url, data=query_params)
        if response.status_code != 200:
            body = { "name": "RELIEFWEB", "message": "Error querying ReliefWeb crisis event feed at " + url, "status": CronJobStatus.ERRONEOUS } # not every case is catched here, e.g. if the base URL is wrong...
            CronJob.sync_cron(body)
        response = response.json()
        for disaster in response['data']:
            disaster = disaster['fields']
            iso3 = disaster['primary_country']['iso3'].upper()
            pcountry = get_country_by_iso3(iso3)
            if pcountry is None:
                continue
            iso2 = pcountry.alpha_2
            dt = parse_date(disaster['date']['created'])
            disaster_data = {
                'event': disaster['primary_type']['code'],
                'year': dt.year,
                'month': dt.month,
            }
            if data.get(iso2) is None:
                data[iso2] = [disaster_data]
            else:
                data[iso2].append(disaster_data)

        if 'next' not in response['links']:
            break
        url = response['links']['next']['href']
    return data
Beispiel #5
0
    def load(self):
        """
        Load data for Databank from specified sources
        """
        source_prefetch_data = {}

        # Prefetch Data
        print('\nPrefetching from sources:: ')
        for source, name in SOURCES:
            if hasattr(source, 'prefetch'):
                start = datetime.datetime.now()
                print(f'\t -> {name}', end='')
                source_prefetch_data[source.__name__] = source.prefetch()
                print(f' [{datetime.datetime.now() - start}]')

        # Load
        print('\nLoading Sources data into GO DB:: ')
        for source, name in SOURCES:
            if hasattr(source, 'global_load'):
                print(f'\t -> {name}', end='')
                source.global_load(source_prefetch_data.get(source.__name__))
                print(f' [{datetime.datetime.now() - start}]')

        index, country_count = 1, Country.objects.count()
        print('\nLoading Sources data for each country to GO DB:: ')
        for country in Country.objects.prefetch_related('countryoverview').all():
            print(f'\t -> ({index}/{country_count}) {country}')
            overview = (
                country.countryoverview if hasattr(country, 'countryoverview') else
                CountryOverview.objects.create(country=country)
            )
            overview.script_modified_at = timezone.now()
            for source, name in SOURCES:
                if hasattr(source, 'load'):
                    print(f'\t\t -> {name}', end='')
                    # Load For each country
                    source_data = source_prefetch_data.get(source.__name__)
                    start = datetime.datetime.now()
                    source.load(country, overview, source_data)
                    print(f' [{datetime.datetime.now() - start}]')
            overview.save()
            index += 1
        if name == 'FTS_HPC': # This source can not be checked/logged via prefetch, that is why we do it here, after the "load".
            body = { "name": name, "message": "Done querying " + name + " data feeds", "num_result": index, "status": CronJobStatus.SUCCESSFUL }
            CronJob.sync_cron(body)
Beispiel #6
0
def prefetch():
    inform_data = {}
    response_d = requests.get(INFORM_API_ENDPOINT)
    if response_d.status_code != 200:  # Because it is too often, it is set to WARNED, but should be ERRONEOUS:
        body = {
            "name": "INFORM",
            "message": "Error querying Inform feed at " + INFORM_API_ENDPOINT,
            "status": CronJobStatus.WARNED
        }  # not every case is catched here, e.g. if the base URL is wrong...
        CronJob.sync_cron(body)
        return inform_data
    response_d = response_d.json()

    for index, i_data in enumerate(response_d):
        iso3 = i_data['Iso3']
        pcountry = get_country_by_iso3(iso3)
        if pcountry is None:
            continue

        indicator_id = i_data['IndicatorId']
        score = i_data['IndicatorScore']
        entry = {
            'id': index + 1,
            'indicator': indicator_id,
            'group': InformIndicator.get_group(indicator_id),
            'score': score,
            'indicator_display': InformIndicator.LABEL_MAP.get(indicator_id),
            'group_display': InformIndicator.get_group_display(indicator_id),
        }

        # Assuming indicator data are unique from the API
        if inform_data.get(pcountry.alpha_2) is None:
            inform_data[pcountry.alpha_2] = [entry]
        else:
            inform_data[pcountry.alpha_2].append(entry)

    body = {
        "name": "INFORM",
        "message": "Done querying Inform feed at " + INFORM_API_ENDPOINT,
        "num_result": len(inform_data),
        "status": CronJobStatus.SUCCESSFUL
    }
    CronJob.sync_cron(body)
    return inform_data
Beispiel #7
0
def prefetch():
    data = {}
    rs = requests.get(API_ENDPOINT)
    if rs.status_code != 200:
        body = {
            "name": "START_NETWORK",
            "message": "Error querying StartNetwork feed at " + API_ENDPOINT,
            "status": CronJobStatus.ERRONEOUS
        }  # not every case is catched here, e.g. if the base URL is wrong...
        CronJob.sync_cron(body)
        return data
    rs = rs.text.splitlines()
    CronJobSum = 0
    for row in csv.DictReader(rs):
        # Some value are like `Congo [DRC]`
        country = get_country_by_name(row['Country'].split('[')[0].strip())
        date = parse_alert_date(row['Alert date'])
        if country is None or date is None:
            continue
        iso2 = country.alpha_2
        alert_data = {
            'date': date.isoformat(),
            'alert': row['Alert'],
            'alert_type': row['Alert type'],
            'amount_awarded': parse_amount(row['Amount Awarded']),
            'crisis_type': row['Crisis Type'],
        }

        if data.get(iso2) is None:
            data[iso2] = [alert_data]
        else:
            data[iso2].append(alert_data)
        CronJobSum += 1
    body = {
        "name": "START_NETWORK",
        "message": "Done querying StartNetwork feed at " + API_ENDPOINT,
        "num_result": CronJobSum,
        "status": CronJobStatus.SUCCESSFUL
    }
    CronJob.sync_cron(body)
    return data
Beispiel #8
0
 def run(self):
     try:
         server = smtplib.SMTP(settings.EMAIL_HOST, settings.EMAIL_PORT)
         server.ehlo()
         server.starttls()
         server.ehlo()
         succ = server.login(settings.EMAIL_USER, settings.EMAIL_PASS)
         if 'successful' not in str(succ[1]):
             cron_rec = {
                 "name": "notification",
                 "message": 'Error contacting ' + settings.EMAIL_HOST +
                 ' smtp server for notifications',
                 "status": CronJobStatus.ERRONEOUS
             }
             CronJob.sync_cron(cron_rec)
         if len(self.recipients) > 0:
             server.sendmail(settings.EMAIL_USER, self.recipients,
                             self.msg.as_string())
         server.quit()
         logger.info('E-mails were sent successfully.')
     except Exception as exc:
         logger.error(
             'Could not send emails with Python smtlib, exception: {} -- {}'
             .format(type(exc).__name__, exc.args))
         ex = ''
         try:
             ex = str(exc.args)
         except Exception as exctwo:
             logger.error(exctwo.args)
         cron_rec = {
             "name":
             "notification",
             "message":
             'Error sending out email with Python smtplib: {}'.format(ex),
             "status":
             CronJobStatus.ERRONEOUS
         }
         CronJob.sync_cron(cron_rec)
Beispiel #9
0
def prefetch():
    fdrs_entities = requests.get(FDRS_NS_API_ENDPOINT, headers=FDRS_HEADERS)

    if fdrs_entities.status_code != 200:
        body = {
            "name": "FDRS",
            "message":
            "Error querying FDRS NS API feed at " + FDRS_NS_API_ENDPOINT,
            "status": CronJobStatus.ERRONEOUS
        }  # not every case is catched here, e.g. if the base URL is wrong...
        CronJob.sync_cron(body)
        return {}
    fdrs_entities = fdrs_entities.json()

    ns_iso_map = {
        # ISO3 are missing for some in FDRS & IFRC-GO only have ISO2 for countries
        ns['KPI_DON_code']: ns['iso_2']
        for ns in fdrs_entities
    }

    body = {
        "name": "FDRS",
        "message": "Done querying FDRS NS API feed at " + FDRS_NS_API_ENDPOINT,
        "num_result": len(ns_iso_map),
        "status": CronJobStatus.SUCCESSFUL
    }
    CronJob.sync_cron(body)

    return {
        # KEY <ISO2>-<Indicator_ID>: {year: '', value: ''}
        f"{ns_iso_map[ns_data['id']].upper()}-{indicator_data['id']}":
        (ns_data['data'][-1] if
         (ns_data['data'] and len(ns_data['data']) > 0) else None)
        for indicator_data in requests.get(FDRS_DATA_API_ENDPOINT,
                                           headers=FDRS_HEADERS).json()['data']
        for ns_data in indicator_data['data']
    }
Beispiel #10
0
    def handle(self, *args, **options):
        logger.info('Starting GDACs ingest')
        # get latest
        nspace = '{http://www.gdacs.org}'
        url = 'http://www.gdacs.org/xml/rss_7d.xml'
        response = requests.get(url)
        if response.status_code != 200:
            text_to_log = 'Error querying GDACS xml feed at ' + url
            logger.error(text_to_log)
            logger.error(response.content)
            body = {
                "name": "ingest_dgacs",
                "message": text_to_log,
                "status": CronJobStatus.ERRONEOUS
            }  # not every case is catched here, e.g. if the base URL is wrong....
            CronJob.sync_cron(body)
            raise Exception('Error querying GDACS')

        # get as XML
        xml2dict = XML2Dict()
        results = xml2dict.parse(response.content)
        levels = {'Orange': 1, 'Red': 2}
        added = 0
        for alert in results['rss']['channel']['item']:
            alert_level = alert['%salertlevel' % nspace].decode('utf-8')
            if alert_level in levels.keys():
                latlon = alert['{http://www.georss.org/georss}point'].decode(
                    'utf-8').split()
                eid = alert.pop(nspace + 'eventid')
                alert_score = alert[nspace + 'alertscore'] if (
                    nspace + 'alertscore') in alert else None
                data = {
                    'title': alert.pop('title'),
                    'description': alert.pop('description'),
                    'image': alert.pop('enclosure'),
                    'report': alert.pop('link'),
                    'publication_date': parse(alert.pop('pubDate')),
                    'year': alert.pop(nspace + 'year'),
                    'lat': latlon[0],
                    'lon': latlon[1],
                    'event_type': alert.pop(nspace + 'eventtype'),
                    'alert_level': levels[alert_level],
                    'alert_score': alert_score,
                    'severity': alert.pop(nspace + 'severity'),
                    'severity_unit': alert['@' + nspace + 'severity']['unit'],
                    'severity_value':
                    alert['@' + nspace + 'severity']['value'],
                    'population_unit':
                    alert['@' + nspace + 'population']['unit'],
                    'population_value':
                    alert['@' + nspace + 'population']['value'],
                    'vulnerability':
                    alert['@' + nspace + 'vulnerability']['value'],
                    'country_text': alert.pop(nspace + 'country'),
                }

                # do some length checking
                for key in [
                        'event_type', 'alert_score', 'severity_unit',
                        'severity_value', 'population_unit', 'population_value'
                ]:
                    if len(data[key]) > 16:
                        data[key] = data[key][:16]
                data = {
                    k: v.decode('utf-8') if isinstance(v, bytes) else v
                    for k, v in data.items()
                }
                gdacsevent, created = GDACSEvent.objects.get_or_create(
                    eventid=eid, defaults=data)
                if created:
                    added += 1
                    for c in data['country_text'].split(','):
                        country = Country.objects.filter(name=c.strip())
                        if country.count() == 1:
                            gdacsevent.countries.add(country[0])

                    title_elements = ['GDACS %s:' % alert_level]
                    for field in ['country_text', 'event_type', 'severity']:
                        if data[field] is not None:
                            title_elements.append(str(data[field]))
                    title = (' ').join(title_elements)

                    # make sure we don't exceed the 100 character limit
                    if len(title) > 97:
                        title = '%s...' % title[:97]

                    fields = {
                        'name': title,
                        'summary': data['description'],
                        'disaster_start_date': data['publication_date'],
                        'auto_generated': True,
                        'auto_generated_source': SOURCES['gdacs'],
                        'ifrc_severity_level': data['alert_level'],
                    }
                    event = Event.objects.create(**fields)
                    # add countries
                    [
                        event.countries.add(c)
                        for c in gdacsevent.countries.all()
                    ]

        text_to_log = '%s GDACs events added' % added
        logger.info(text_to_log)
        body = {
            "name": "ingest_gdacs",
            "message": text_to_log,
            "num_result": added,
            "status": CronJobStatus.SUCCESSFUL
        }
        CronJob.sync_cron(body)
Beispiel #11
0
    def handle(self, *args, **options):
        logger.info('Starting appeal document ingest')

        # v smoke test
        baseurl = 'https://www.ifrc.org/appeals/'  # no more ...en/publications-and-reports...
        http = PoolManager(
        )  # stackoverflow.com/questions/36516183/what-should-i-use-to-open-a-url-instead-of-urlopen-in-urllib3
        smoke_response = http.request('GET', baseurl)
        joy_to_the_world = False
        if smoke_response.status == 200:
            joy_to_the_world = True  # We log the success later, when we know the numeric results.
        else:
            body = {
                "name": "ingest_appeal_docs",
                "message":
                f'Error ingesting appeals_docs on url: {baseurl}, error_code: {smoke_response.code}',
                "status": CronJobStatus.ERRONEOUS
            }
            CronJob.sync_cron(body)
        # ^ smoke test

        if options['fullscan']:
            # If the `--fullscan` option is passed (at the end of command), check ALL appeals. Runs an hour!
            print('Doing a full scan of all Appeals')
            qset = Appeal.objects.all()
        else:
            # By default, only check appeals for the past 3 months where Appeal Documents is 0
            now = datetime.now().replace(tzinfo=timezone.utc)
            six_months_ago = now - relativedelta(months=6)
            # This was the original qset, but it wouldn't get newer docs for the same Appeals
            # qset = Appeal.objects.filter(appealdocument__isnull=True).filter(end_date__gt=six_months_ago)
            qset = Appeal.objects.filter(end_date__gt=six_months_ago)

        # qset = Appeal.objects.filter(code='Something')  # could help debug
        # First get all Appeal Codes
        appeal_codes = [a.code for a in qset]

        # Modify code taken from https://pastebin.com/ieMe9yPc to scrape `publications-and-reports` and find
        # Documents for each appeal code
        output = []
        page_not_found = []
        for code in appeal_codes:
            code = code.replace(' ', '')
            docs_url = f'{baseurl}?appeal_code={code}'  # no more ac={code}&at=0&c=&co=&dt=1&f=&re=&t=&ti=&zo=
            try:
                http = PoolManager()
                response = http.request('GET', docs_url)
            except Exception:  # if we get an error fetching page for an appeal, we ignore it
                page_not_found.append(code)
                continue

            soup = BeautifulSoup(response.data, "lxml")
            div = soup.find('div', class_='row appeals-view__row')
            for t in div.findAll('tbody'):
                output = output + self.makelist(t)

        # Once we have all Documents in output, we add all missing Documents to the associated Appeal
        not_found = []
        existing = []
        created = []

        acodes = list(set([a['appealcode'] for a in output]))
        for code in acodes:
            try:
                appeal = Appeal.objects.get(code=code)
            except ObjectDoesNotExist:
                not_found.append(code)
                continue

            existing_docs = list(appeal.appealdocument_set.all())
            docs = [a for a in output if code == a['appealcode']]
            for doc in docs:
                if doc['url'].startswith('/'):  # can be /docs or /sites also
                    doc['url'] = f'https://www.ifrc.org{doc["url"]}'
                    # href only contains relative path to the document if it's available at the ifrc.org site
                exists = len([
                    a for a in existing_docs if a.document_url == doc['url']
                ]) > 0
                if exists:
                    existing.append(doc['url'])
                else:
                    try:
                        created_at = self.parse_date(doc['date'])
                    except Exception:
                        created_at = None

                    AppealDocument.objects.create(
                        document_url=doc['url'],
                        name=doc[
                            'appealtype'],  # not ['name'], because this is the appeal's name
                        created_at=created_at,
                        appeal=appeal,
                    )
                    created.append(doc['url'])
        text_to_log = []
        text_to_log.append('%s appeal documents created' % len(created))
        text_to_log.append('%s existing appeal documents' % len(existing))
        text_to_log.append('%s pages not found for appeal' %
                           len(page_not_found))

        for t in text_to_log:
            logger.info(t)
            # body = { "name": "ingest_appeal_docs", "message": t, "status": CronJobStatus.SUCCESSFUL }
            # CronJob.sync_cron(body)

        if len(not_found):
            t = '%s documents without appeals in system' % len(not_found)
            logger.warning(t)
            body = {
                "name": "ingest_appeal_docs",
                "message": t,
                "num_result": len(not_found),
                "status": CronJobStatus.WARNED
            }
            CronJob.sync_cron(body)

        if (joy_to_the_world):
            body = {
                "name":
                "ingest_appeal_docs",
                "message": (f'Done ingesting appeals_docs on url {baseurl},'
                            f' {len(created)} appeal document(s) were created,'
                            f' {len(existing)} already exist,'
                            f' {len(page_not_found)} not found'),
                "num_result":
                len(created),
                "status":
                CronJobStatus.SUCCESSFUL
            }
            CronJob.sync_cron(body)
Beispiel #12
0
    def load(self):
        """
        Load data for Databank from specified sources
        """
        source_prefetch_data = {}

        # Prefetch Data
        try:
            print('\nPrefetching from sources:: ')
            for source, name in SOURCES:
                if hasattr(source, 'prefetch'):
                    start = datetime.datetime.now()
                    print(f'\t -> {name}', end='')
                    prefetch_response = source.prefetch()
                    if prefetch_response is not None:
                        source_prefetch_data[
                            source.
                            __name__], item_count, sources = prefetch_response
                        # Log success prefetch
                        CronJob.sync_cron({
                            'name':
                            name,
                            'message':
                            f'Done querying {name}' +
                            (sources and f' using sources: {sources}') or '',
                            'num_result':
                            item_count,
                            'status':
                            CronJobStatus.SUCCESSFUL,
                        })
                    print(f' [{datetime.datetime.now() - start}]')
        except Exception as ex:
            CronJob.sync_cron({
                'name': 'ingest_databank',
                'message':
                f'Could not prefetch from sources\n\nException:\n{str(ex)}',
                'status': CronJobStatus.ERRONEOUS,
            })

        # Load
        try:
            print('\nLoading Sources data into GO DB:: ')
            for source, name in SOURCES:
                if hasattr(source, 'global_load'):
                    print(f'\t -> {name}', end='')
                    source.global_load(
                        source_prefetch_data.get(source.__name__))
                    print(f' [{datetime.datetime.now() - start}]')

            index, country_count = 1, Country.objects.count()
            print('\nLoading Sources data for each country to GO DB:: ')
            for country in Country.objects.prefetch_related(
                    'countryoverview').all():
                print(u'\t -> ({}/{}) {}'.format(index, country_count,
                                                 str(country)))
                overview = (country.countryoverview if hasattr(
                    country, 'countryoverview') else
                            CountryOverview.objects.create(country=country))
                overview.script_modified_at = timezone.now()
                for source, name in SOURCES:
                    if hasattr(source, 'load'):
                        print(f'\t\t -> {name}', end='')
                        # Load For each country
                        source_data = source_prefetch_data.get(source.__name__)
                        start = datetime.datetime.now()
                        source.load(country, overview, source_data)
                        print(f' [{datetime.datetime.now() - start}]')
                overview.save()
                index += 1
            # This source can not be checked/logged via prefetch, that is why we do it here, after the "load".
            if name == 'FTS_HPC':
                CronJob.sync_cron({
                    'name': name,
                    'message': f'Done querying {name} data feeds',
                    'num_result': index,
                    "status": CronJobStatus.SUCCESSFUL,
                })
        except Exception as ex:
            CronJob.sync_cron({
                'name': 'ingest_databank',
                'message': f'Could not load all data\n\nException:\n{str(ex)}',
                'status': CronJobStatus.ERRONEOUS,
            })
Beispiel #13
0
def send_notification(subject, recipients, html, mailtype='', files=None):
    """ Generic email sending method, handly only HTML emails currently """
    if not settings.EMAIL_USER or not settings.EMAIL_API_ENDPOINT:
        logger.warning(
            'Cannot send notifications.\n'
            'No username and/or API endpoint set as environment variables.')
        if settings.DEBUG:
            print('-' * 22, 'EMAIL START', '-' * 22)
            print(
                f'subject={subject}\nrecipients={recipients}\nhtml={html}\nmailtype={mailtype}'
            )
            print('-' * 22, 'EMAIL END -', '-' * 22)
        return
    if settings.DEBUG_EMAIL:
        print('-' * 22, 'EMAIL START', '-' * 22)
        print(f'\n{html}\n')
        print('-' * 22, 'EMAIL END -', '-' * 22)

    # If it's not PROD only able to use test e-mail addresses which are set in the env var
    to_addresses = recipients if isinstance(recipients, list) else [recipients]

    if not IS_PROD:
        logger.info('Using test email addresses...')
        to_addresses = []
        logger.info(to_addresses)
        for eml in settings.TEST_EMAILS:

            # It is possible to filter test addressees to domain name only – not used.
            is_dom = True if '@' not in eml else False
            if is_dom:
                for rec in recipients:
                    try:
                        if eml == rec.split('@')[1]:
                            to_addresses.append(rec)
                    except Exception:
                        logger.info(
                            'Could not extract domain from: {}'.format(rec))
            elif eml and (eml in recipients):
                to_addresses.append(eml)

    recipients_as_string = ','.join(to_addresses)
    if not recipients_as_string:
        if len(to_addresses) > 0:
            warn_msg = 'Recipients failed to be converted to string, 1st rec.: {}'.format(
                to_addresses[0])
            logger.info(warn_msg)
            # Save the warning into the CronJob logs too
            cron_error = {
                "name": "index_and_notify",
                "message": warn_msg,
                "status": CronJobStatus.WARNED
            }
            CronJob.sync_cron(cron_error)
        else:
            logger.info('Recipients string is empty')
        return  # If there are no recipients it's unnecessary to send out the email

    # Encode with base64 into bytes, then converting it back to strings for the JSON
    payload = {
        "FromAsBase64":
        str(base64.b64encode(settings.EMAIL_USER.encode('utf-8')), 'utf-8'),
        "ToAsBase64":
        str(base64.b64encode(EMAIL_TO.encode('utf-8')), 'utf-8'),
        "CcAsBase64":
        "",
        "BccAsBase64":
        str(base64.b64encode(recipients_as_string.encode('utf-8')), 'utf-8'),
        "SubjectAsBase64":
        str(base64.b64encode(subject.encode('utf-8')), 'utf-8'),
        "BodyAsBase64":
        str(base64.b64encode(html.encode('utf-8')), 'utf-8'),
        "IsBodyHtml":
        True,
        "TemplateName":
        "",
        "TemplateLanguage":
        ""
    }

    # The response contains the GUID (res.text)
    res = requests.post(settings.EMAIL_API_ENDPOINT, json=payload)
    res_text = res.text.replace('"', '')

    if res.status_code == 200:
        logger.info(u'Subject: {subject}, Recipients: {recs}'.format(
            subject=subject, recs=recipients_as_string))

        logger.info('GUID: {}'.format(res_text))
        # Saving GUID into a table so that the API can be queried with it to get info about
        # if the actual sending has failed or not.
        NotificationGUID.objects.create(
            api_guid=res_text,
            email_type=mailtype,
            to_list=f'To: {EMAIL_TO}; Bcc: {recipients_as_string}')

        logger.info('E-mails were sent successfully.')
    elif res.status_code == 401 or res.status_code == 403:
        # Try sending with Python smtplib, if reaching the API fails
        logger.error(
            f'Authorization/authentication failed ({res.status_code}) to the e-mail sender API.'
        )
        msg = construct_msg(subject, html, files)
        SendMail(to_addresses, msg).start()
    else:
        # Try sending with Python smtplib, if reaching the API fails
        logger.error(
            'Could not reach the e-mail sender API. Trying with Python smtplib...'
        )
        msg = construct_msg(subject, html, files)
        SendMail(to_addresses, msg).start()

    return res.text
Beispiel #14
0
    def handle(self, *args, **options):

        guids = [
            e.auto_generated_source for e in Event.objects.filter(
                auto_generated_source__startswith='www.who.int')
        ]

        logger.info('Querying WHO RSS feed for new emergency data')
        # get latest
        nspace = '{https://www.who.int}'
        ur2 = []
        ur2.append('https://www.who.int/feeds/entity/csr/don/en/rss.xml')
        ur2.append('https://www.who.int/feeds/entity/hac/en/rss.xml')

        for index, url in enumerate(ur2):
            response = requests.get(url)
            if response.status_code != 200:
                text_to_log = 'Error querying WHO xml feed at ' + url
                logger.error(text_to_log)
                logger.error(response.content)
                body = {
                    "name": "ingest_who",
                    "message": text_to_log,
                    "status": CronJobStatus.ERRONEOUS
                }  # not every case is catched here, e.g. if the base URL is wrong...
                CronJob.sync_cron(body)
                raise Exception('Error querying WHO')

            # get as XML, but then do not use the obsolate xml2dict = XML2Dict(), but xmltodict
            results = xmltodict.parse(response.content)
            added = 0
            # lastBuildDate = results['rss']['channel']['lastBuildDate']
            # managingEditor = results['rss']['channel']['managingEditor']
            for row in results['rss']['channel']['item']:
                data = {
                    'title': row.pop('title'),
                    'link': row.pop('link'),
                    'description': row.pop('description'),
                    'guid': row['guid']['#text'],
                    'isPermaLink': row['guid']['@isPermaLink'],
                    'category': row.pop('category'),
                    'pubDate': row.pop('pubDate'),
                }
                if data['guid'] in guids:
                    continue
                if data['guid'] in ['WeDontWantThis', 'NeitherThis']:
                    continue

                title = data['title']  # for csr link
                short = title.replace(' (ex-China)', '')
                pos = short.find(' – ')
                region = None
                country = None
                if pos == -1:
                    pos = short.find(' - ')
                if pos > 0:
                    country = short[
                        pos + 3:]  # cutting the part after " – " or " - "
                else:
                    country = 'DashNotFoundInTitle'
                if country == 'Democratic Republic of the Congo':  #replacement
                    country = 'Congo, Dem. Rep.'
                elif country == 'Argentine Republic':
                    country = 'Argentina'
                elif country == 'Republic of Panama':
                    country = 'Panama'
                elif country == 'Islamic Republic of Pakistan':
                    country = 'Pakistan'
                elif country[:4] == 'the ':
                    country = country[4:]
                elif index == 1:  # for 'hac' category. See link for 'hac' above
                    hac_category = data['category']

                    # Searching for the given country
                    end = hac_category.find('[country]')
                    if end > 0:
                        start = hac_category[:end - 1].rfind(
                            ',', 0)  # backwards search the comma
                        country = hac_category[
                            start + 2:end -
                            1]  # Getting the comma followed part from the category as Country
                    else:
                        country = 'CountryNotFoundInCategory'  # Will not be found via filtering
                    # Searching for the given region
                    end = hac_category.find('[region]')
                    if end > 0:
                        start = hac_category[:end - 1].rfind(
                            ',', 0)  # backwards search the comma
                        region_name = hac_category[
                            start + 2:end -
                            1]  # Getting the comma followed part from the category as Country
                        if 'Afr' in region_name:  # Keep synchronised with https://github.com/IFRCGo/go-api/blob/master/api/models.py#L38-L42
                            region = 0
                        elif 'Ame' in region_name:
                            region = 1
                        elif 'As' in region_name:
                            region = 2
                        elif 'Eu' in region_name:
                            region = 3
                        elif 'MENA' in region_name:
                            region = 4
                        else:  # search for region that is joined to country (later)...
                            region = None

                # make sure we don't exceed the 100 character limit
                if len(title) > 99:
                    title = '%s...' % title[:99]
                date = parse(data['pubDate'])
                if data['category'] == 'news':
                    alert_level = 1
                else:
                    alert_level = 2
                if "Ebola" in title or "virus" in title or "fever" in title:
                    alert_level = 2
                elif index == 1:
                    alert_level = 0

                if data['category'] == 'news':
                    summary = data['description']
                else:
                    summary = data['description'] + ' (' + data[
                        'category'] + ')'

                fields = {
                    'name': title,
                    'summary': summary,
                    'disaster_start_date': date,
                    'auto_generated': True,
                    'auto_generated_source': data['guid'],
                    'ifrc_severity_level': alert_level,
                }
                # TODO: fields['name'] sometimes exceeds 100 maxlength, so will need some altering if this will be used
                event = Event.objects.create(**fields)
                added += 1

                # add country
                country_found = Country.objects.filter(name=country.strip())
                if country_found.count() >= 1:
                    event.countries.add(country_found[0])
                else:
                    country_word_list = country.split(
                    )  # list of country words
                    country_found = Country.objects.filter(
                        name=country_word_list[-1].strip()
                    )  # Search only the last word, like "Republic of Panama" > "Panama"
                    if country_found.count() >= 1:
                        event.countries.add(country_found[0])

                # add region
                # print(country)
                if (region is None) and (country_found.count() > 0) and (
                        country != 'CountryNotFoundInCategory'):
                    region = country_found[0].region_id
                if region is not None:
                    event.regions.add(region)

            text_to_log = "{} WHO messages added, URL-{}".format(
                added, index + 1)
            logger.info(text_to_log)

            # Database CronJob logging
            body = {
                "name": "ingest_who",
                "message": text_to_log,
                "num_result": added,
                "storing_days": 6,
                "status": CronJobStatus.SUCCESSFUL
            }

            #... via API - not using frosm here, but from front-end it can be useful:
            #resp = requests.post(api_url + '/api/v2/add_cronjob_log/', body, headers={'CONTENT_TYPE': 'application/json'})

            # ... via a direct write-in:
            CronJob.sync_cron(body)
Beispiel #15
0
    def handle(self, *args, **options):

        guids = [
            e.auto_generated_source for e in Event.objects.filter(
                auto_generated_source__startswith='www.who.int')
        ]

        logger.info('Querying WHO RSS feed for new emergency data')
        # get latest
        nspace = '{https://www.who.int}'
        ur2 = []
        ur2.append('https://www.who.int/feeds/entity/csr/don/en/rss.xml')
        ur2.append('https://www.who.int/feeds/entity/hac/en/rss.xml')

        for index, url in enumerate(ur2):
            response = requests.get(url)
            if response.status_code != 200:
                text_to_log = 'Error querying WHO xml feed at ' + url
                logger.error(text_to_log)
                logger.error(response.content)
                body = {
                    "name": "ingest_who",
                    "message": text_to_log,
                    "status": CronJobStatus.ERRONEOUS
                }  # not every case is catched here, e.g. if the base URL is wrong...
                CronJob.sync_cron(body)
                raise Exception('Error querying WHO')

            # get as XML
            xml2dict = XML2Dict()
            results = xml2dict.parse(response.content)
            added = 0
            lastBuildDate = results['rss']['channel']['lastBuildDate']
            managingEditor = results['rss']['channel']['managingEditor']

            for row in results['rss']['channel']['item']:
                data = {
                    'title': row.pop('title'),
                    'link': row.pop('link'),
                    'description': row.pop('description'),
                    'guid': row.pop('guid'),
                    #                   '@guid': row.pop('@guid'),  #can not be popped twice
                    'isPermaLink': row.pop('@guid').pop('isPermaLink'),
                    'category': row.pop('category'),
                    'pubDate': row.pop('pubDate'),
                }
                if data['guid'].decode("utf-8") in guids:
                    continue
                if data['guid'].decode("utf-8") in [
                        'WeDontWantThis', 'NeitherThis'
                ]:
                    continue

#                alert_level = alert['%salertlevel' % nspace].decode('utf-8')
#                if alert_level in levels.keys():
#                    latlon = alert['{http://www.georss.org/georss}point'].decode('utf-8').split()
#                    eid = alert.pop(nspace + 'eventid')
#                    alert_score = alert[nspace + 'alertscore'] if (nspace + 'alertscore') in alert else None
#                    data = {
#                        'title': alert.pop('title'),
#                        'description': alert.pop('description'),
#                        'image': alert.pop('enclosure'),
#                        'report': alert.pop('link'),
#                        'publication_date': parse(alert.pop('pubDate')),
#                        'year': alert.pop(nspace + 'year'),
#                        'lat': latlon[0],
#                        'lon': latlon[1],
#                        'event_type': alert.pop(nspace + 'eventtype'),
#                        'alert_level': levels[alert_level],
#                        'alert_score': alert_score,
#                        'severity': alert.pop(nspace + 'severity'),
#                        'severity_unit': alert['@' + nspace + 'severity']['unit'],
#                        'severity_value': alert['@' + nspace + 'severity']['value'],
#                        'population_unit': alert['@' + nspace + 'population']['unit'],
#                        'population_value': alert['@' + nspace + 'population']['value'],
#                        'vulnerability': alert['@' + nspace + 'vulnerability']['value'],
#                        'country_text': alert.pop(nspace + 'country'),
#                    }
#
#                    # do some length checking
#                    for key in ['event_type', 'alert_score', 'severity_unit', 'severity_value', 'population_unit', 'population_value']:
#                        if len(data[key]) > 16:
#                            data[key] = data[key][:16]
#                    data = {k: v.decode('utf-8') if isinstance(v, bytes) else v for k, v in data.items()}
#                    gdacsevent, created = GDACSEvent.objects.get_or_create(eventid=eid, defaults=data)
#                    if created:
#                        added += 1
#                        for c in data['country_text'].split(','):
#                            country = Country.objects.filter(name=c.strip())
#                            if country.count() == 1:
#                                gdacsevent.countries.add(country[0])
#
#                        title_elements = ['GDACS %s:' % alert_level]
#                        for field in ['country_text', 'event_type', 'severity']:
#                            if data[field] is not None:
#                                title_elements.append(str(data[field]))
#                        title = (' ').join(title_elements)
#
                title = data['title'].decode("utf-8")  # for csr link
                short = title.replace(' (ex-China)', '')
                pos = short.find(' – ')
                region = None
                country = None
                if pos == -1:
                    pos = short.find(' - ')
                if pos > 0:
                    country = short[
                        pos + 3:]  # cutting the part after " – " or " - "
                else:
                    country = 'DashNotFoundInTitle'
                if country == 'Democratic Republic of the Congo':  #replacement
                    country = 'Congo, Dem. Rep.'
                elif country == 'Argentine Republic':
                    country = 'Argentina'
                elif country == 'Republic of Panama':
                    country = 'Panama'
                elif country == 'Islamic Republic of Pakistan':
                    country = 'Pakistan'
                elif index == 1:  # for 'hac' category. See link for 'hac' above
                    hac_category = data['category'].decode("utf-8")

                    # Searching for the given country
                    end = hac_category.find('[country]')
                    if end > 0:
                        start = hac_category[:end - 1].rfind(
                            ',', 0)  # backwards search the comma
                        country = hac_category[
                            start + 2:end -
                            1]  # Getting the comma followed part from the category as Country
                    else:
                        country = 'CountryNotFoundInCategory'  # Will not be found via filtering
                    # Searching for the given region
                    end = hac_category.find('[region]')
                    if end > 0:
                        start = hac_category[:end - 1].rfind(
                            ',', 0)  # backwards search the comma
                        region_name = hac_category[
                            start + 2:end -
                            1]  # Getting the comma followed part from the category as Country
                        if 'Afr' in region_name:  # Keep synchronised with https://github.com/IFRCGo/go-api/blob/master/api/models.py#L38-L42
                            region = 0
                        elif 'Ame' in region_name:
                            region = 1
                        elif 'As' in region_name:
                            region = 2
                        elif 'Eu' in region_name:
                            region = 3
                        elif 'MENA' in region_name:
                            region = 4
                        else:  # search for region that is joined to country (later)...
                            region = None

                # make sure we don't exceed the 100 character limit
                if len(title) > 99:
                    title = '%s...' % title[:99]
                date = parse(data['pubDate'].decode("utf-8"))
                if data['category'].decode("utf-8") == 'news':
                    alert_level = 1
                else:
                    alert_level = 2
                if "Ebola" in title or "virus" in title or "fever" in title:
                    alert_level = 2
                elif index == 1:
                    alert_level = 0

                if data['category'].decode("utf-8") == 'news':
                    summary = data['description'].decode("utf-8")
                else:
                    summary = data['description'].decode(
                        "utf-8") + ' (' + data['category'].decode(
                            "utf-8") + ')'

                fields = {
                    'name': title,
                    'summary': summary,
                    'disaster_start_date': date,
                    'auto_generated': True,
                    'auto_generated_source': data['guid'].decode("utf-8"),
                    'ifrc_severity_level': alert_level,
                }
                # TODO: fields['name'] sometimes exceeds 100 maxlength, so will need some altering if this will be used
                event = Event.objects.create(**fields)
                added += 1

                # add country
                country_found = Country.objects.filter(name=country.strip())
                if country_found.count() >= 1:
                    event.countries.add(country_found[0])
                else:
                    country_word_list = country.split(
                    )  # list of country words
                    country_found = Country.objects.filter(
                        name=country_word_list[-1].strip()
                    )  # Search only the last word, like "Republic of Panama" > "Panama"
                    if country_found.count() >= 1:
                        event.countries.add(country_found[0])

                # add region
                # print(country)
                if (region is None) and (country_found.count() > 0) and (
                        country != 'CountryNotFoundInCategory'):
                    region = country_found[0].region_id
                if region is not None:
                    event.regions.add(region)

            text_to_log = "{} WHO messages added, URL-{}".format(
                added, index + 1)
            logger.info(text_to_log)

            # Database CronJob logging
            body = {
                "name": "ingest_who",
                "message": text_to_log,
                "num_result": added,
                "storing_days": 6,
                "status": CronJobStatus.SUCCESSFUL
            }

            #... via API - not using frosm here, but from front-end it can be useful:
            #resp = requests.post(api_url + '/api/v2/add_cronjob_log/', body, headers={'CONTENT_TYPE': 'application/json'})

            # ... via a direct write-in:
            CronJob.sync_cron(body)
Beispiel #16
0
    def handle(self, *args, **options):
        logger.info('Starting appeal document ingest')

        # v smoke test
        baseurl = 'https://www.ifrc.org/en/publications-and-reports/appeals/'
        smoke_response = urlopen(baseurl)
        joy_to_the_world = False
        if smoke_response.code == 200:
            joy_to_the_world = True  # We log the success later, when we know the numeric results.
        else:
            body = {
                "name":
                "ingest_appeal_docs",
                "message":
                'Error ingesting appeals_docs on url ' + baseurl +
                ', error_code: ' + smoke_response.code,
                "status":
                CronJobStatus.ERRONEOUS
            }
            CronJob.sync_cron(body)
        # ^ smoke test

        if options['fullscan']:
            # If the `--fullscan` option is passed, check ALL appeals
            print('Doing a full scan of all Appeals')
            qset = Appeal.objects.all()
        else:
            # By default, only check appeals for the past 3 months where Appeal Documents is 0
            now = datetime.now()
            three_months_ago = now - relativedelta(months=3)
            # This was the original qset, but it wouldn't get newer docs for the same Appeals
            #qset = Appeal.objects.filter(appealdocument__isnull=True).filter(end_date__gt=three_months_ago)
            qset = Appeal.objects.filter(end_date__gt=three_months_ago)

        # First get all Appeal Codes
        appeal_codes = [a.code for a in qset]

        # Modify code taken from https://pastebin.com/ieMe9yPc to scrape `publications-and-reports` and find
        # Documents for each appeal code
        output = []
        page_not_found = []
        for code in appeal_codes:
            code = code.replace(' ', '')
            docs_url = baseurl + '?ac=' + code + '&at=0&c=&co=&dt=1&f=&re=&t=&ti=&zo='
            try:
                response = urlopen(docs_url)
            except:  # if we get an error fetching page for an appeal, we ignore it
                page_not_found.append(code)
                continue

            soup = BeautifulSoup(response.read(), "lxml")
            div = soup.find('div', id='cw_content')
            for t in div.findAll('tbody'):
                output = output + self.makelist(t)

        # Once we have all Documents in output, we add all missing Documents to the associated Appeal
        not_found = []
        existing = []
        created = []

        acodes = list(set([a[2] for a in output]))
        for code in acodes:
            try:
                appeal = Appeal.objects.get(code=code)
            except ObjectDoesNotExist:
                not_found.append(code)
                continue

            existing_docs = list(appeal.appealdocument_set.all())
            docs = [a for a in output if a[2] == code]
            for doc in docs:
                doc[0] = 'https://www.ifrc.org' + doc[0] if doc[0].startswith(
                    '/docs'
                ) else doc[
                    0]  # href only contains relative path to the document if it's available at the ifrc.org site
                exists = len(
                    [a for a in existing_docs if a.document_url == doc[0]]) > 0
                if exists:
                    existing.append(doc[0])
                else:
                    try:
                        created_at = self.parse_date(doc[5])
                    except:
                        created_at = None

                    AppealDocument.objects.create(
                        document_url=doc[0],
                        name=doc[4],
                        created_at=created_at,
                        appeal=appeal,
                    )
                    created.append(doc[0])
        text_to_log = []
        text_to_log.append('%s appeal documents created' % len(created))
        text_to_log.append('%s existing appeal documents' % len(existing))
        text_to_log.append('%s pages not found for appeal' %
                           len(page_not_found))

        for t in text_to_log:
            logger.info(t)
            # body = { "name": "ingest_appeal_docs", "message": t, "status": CronJobStatus.SUCCESSFUL }
            # CronJob.sync_cron(body)

        if len(not_found):
            t = '%s documents without appeals in system' % len(not_found)
            logger.warn(t)
            body = {
                "name": "ingest_appeal_docs",
                "message": t,
                "num_result": len(not_found),
                "status": CronJobStatus.WARNED
            }
            CronJob.sync_cron(body)

        if (joy_to_the_world):
            body = {
                "name":
                "ingest_appeal_docs",
                "message":
                'Done ingesting appeals_docs on url ' + baseurl +
                ', %s appeal document(s) were created, %s already exist, %s not found'
                % (len(created), len(existing), len(page_not_found)),
                "num_result":
                len(created),
                "status":
                CronJobStatus.SUCCESSFUL
            }
            CronJob.sync_cron(body)
Beispiel #17
0
def load(country, overview, _):
    pcountry = get_country_by_iso2(country.iso)
    if pcountry is None:
        return
    fts_data = requests.get(FTS_URL.format(pcountry.alpha_3), headers=HEADERS)
    emg_data = requests.get(EMERGENCY_URL.format(pcountry.alpha_3),
                            headers=HEADERS)

    if fts_data.status_code != 200:
        body = {
            "name": "FTS_HPC",
            "message": "Error querying HPC fts data feed at " + FTS_URL,
            "status": CronJobStatus.ERRONEOUS
        }  # not every case is catched here, e.g. if the base URL is wrong...
        CronJob.sync_cron(body)
        return {}
    if emg_data.status_code != 200:
        body = {
            "name": "FTS_HPC",
            "message":
            "Error querying HPC emergency data feed at " + EMERGENCY_URL,
            "status": CronJobStatus.ERRONEOUS
        }  # not every case is catched here, e.g. if the base URL is wrong...
        CronJob.sync_cron(body)
        return {}
    fts_data = fts_data.json()
    emg_data = emg_data.json()

    c_data = {}

    # fundingTotals, pledgeTotals
    for fund_area in ['fundingTotals', 'pledgeTotals']:
        fund_area_data = fts_data['data']['report3'][fund_area]['objects']
        if len(fund_area_data) > 0:
            for v in fund_area_data[0]['objectsBreakdown']:
                try:
                    year = int(v['name'])
                    totalFunding = v['totalFunding']
                except ValueError:
                    continue
                if year not in c_data:
                    c_data[year] = {fund_area: totalFunding}
                else:
                    c_data[year][fund_area] = totalFunding

    # numActivations
    CronJobSum = 0
    for v in emg_data['data']:
        try:
            year = datetime.datetime.strptime(
                v['date'].split('T')[0],
                '%Y-%m-%d',
            ).year
        except ValueError:
            continue
        if year not in c_data:
            c_data[year] = {'numActivations': 1}
        else:
            c_data[year]['numActivations'] = c_data[year].get(
                'numActivations', 0) + 1
        CronJobSum += c_data[year]['numActivations']

    overview.fts_data = [{
        'year': year,
        **values,
    } for year, values in c_data.items()]
    overview.save()
Beispiel #18
0
    def get_new_or_modified_appeals(self):
        use_local_file = True if os.getenv(
            'DJANGO_DB_NAME') == 'test' and os.path.exists(
                'appeals.json') else False
        new = []
        modified = []
        if use_local_file:
            # read from static file for development
            logger.info('Using local appeals.json file')
            with open('appeals.json') as f:
                modified = json.loads(f.read())
            logger.info('Using local appealbilaterals.json file')
            with open('appealbilaterals.json') as f:
                records = json.loads(f.read())
                bilaterals = {}
                for r in records:  # code duplication ¤
                    if r['APP_Code'] and r['AmountCHF']:
                        if r['APP_Code'] in bilaterals.keys():
                            bilaterals[r['APP_Code']] += r['AmountCHF']
                        else:
                            bilaterals[r['APP_Code']] = r['AmountCHF']
        else:
            # get latest BILATERALS
            logger.info('Querying appeals API for new appeals data')
            url = 'http://go-api.ifrc.org/api/appealbilaterals'
            auth = (os.getenv('APPEALS_USER'), os.getenv('APPEALS_PASS'))
            response = requests.get(url, auth=auth)
            if response.status_code != 200:
                text_to_log = 'Error querying AppealBilaterals API at ' + url
                logger.error(text_to_log)
                logger.error(response.content)
                body = {
                    "name": "ingest_appeals",
                    "message": text_to_log,
                    "status": CronJobStatus.ERRONEOUS
                }  # not every case is catched here, e.g. if the base URL is wrong...
                CronJob.sync_cron(body)
                raise Exception(text_to_log)

            records = response.json()

            # write the current record file to local disk
            with open('appealbilaterals.json', 'w') as outfile:
                json.dump(records, outfile)

            bilaterals = {}
            for r in records:  # code duplication ¤
                if r['APP_Code'] and r['AmountCHF']:
                    if r['APP_Code'] in bilaterals.keys():
                        bilaterals[r['APP_Code']] += r['AmountCHF']
                    else:
                        bilaterals[r['APP_Code']] = r['AmountCHF']

            # get latest APPEALS
            logger.info('Querying appeals API for new appeals data')
            url = 'http://go-api.ifrc.org/api/appeals'
            auth = (os.getenv('APPEALS_USER'), os.getenv('APPEALS_PASS'))
            response = requests.get(url, auth=auth)
            if response.status_code != 200:
                logger.error('Error querying Appeals API')
                raise Exception('Error querying Appeals API')
            records = response.json()

            # write the current record file to local disk
            with open('appeals.json', 'w') as outfile:
                json.dump(records, outfile)

            codes = [a.code for a in Appeal.objects.all()]
            for r in records:
                # Temporary filtering, the manual version should be kept:
                if r['APP_code'] in ['MDR65002', 'MDR00001', 'MDR00004']:
                    continue
                #if r['APP_code'] != 'MDRMZ014': # Debug to test bilateral additions or other specific appeals
                #    continue
                if not r['APP_code'] in codes:
                    new.append(r)
                # We use all records, do NOT check if last_modified > since_last_checked
                modified.append(r)

        return new, modified, bilaterals
Beispiel #19
0
def _epidemics_prefetch():
    query_params = json.dumps({
        'limit': 1000,
        'filter': {
            'operator': 'AND',
            'conditions': [
                {
                    'field': 'primary_type.code',
                    'value': ['EP'],
                },
            ]
        },
        'fields': {
            'include': ['name', 'date.created', 'primary_country.iso3']
        }
    })

    url = DISASTER_API
    data = {}
    while True:
        response = requests.post(url, data=query_params)
        if response.status_code != 200:
            body = { "name": "RELIEFWEB", "message": "Error querying ReliefWeb epicemics feed at " + url, "status": CronJobStatus.ERRONEOUS } # not every case is catched here, e.g. if the base URL is wrong...
            CronJob.sync_cron(body)
            return data
        response = response.json()
        for epidemic in response['data']:
            epidemic = epidemic['fields']
            iso3 = epidemic['primary_country']['iso3'].upper()
            pcountry = get_country_by_iso3(iso3)
            if pcountry is None:
                continue
            iso2 = pcountry.alpha_2
            dt = parse_date(epidemic['date']['created'])
            name = epidemic['name']
            selected_epidemic_type = None

            # Simple Text Search
            for epidemic_type, _ in PastEpidemic.CHOICES:
                if epidemic_type.lower() in name.lower():
                    selected_epidemic_type = epidemic_type
            if selected_epidemic_type is None:
                continue

            epidemic_data = {
                'epidemic': selected_epidemic_type,
                'year': dt.year,
                'month': dt.month,
            }

            if data.get(iso2) is None:
                data[iso2] = [epidemic_data]
            else:
                data[iso2].append(epidemic_data)

        if 'next' not in response['links']:
            break
        url = response['links']['next']
    body = { "name": "RELIEFWEB", "message": "Done querying all ReliefWeb feeds at " + url, "num_result": len(data), "status": CronJobStatus.SUCCESSFUL }
    CronJob.sync_cron(body)
    return data