Ejemplo n.º 1
0
    def search(self, keyword: str):
        logger.info(f"Searching for: {keyword}")
        ret = []
        resp = self.get(self._search_api,
                        params={
                            "kw": keyword,
                            "per_page": 100,
                            "page": 1
                        })  # 取前 100 条结果
        if resp.status_code != 200 or resp.json()["code"] != 1:
            logger.warning(
                f"Response error: {resp.status_code} {self._search_api}")
            return ret

        data = resp.json()
        anime_meta_list = data.get("data").get("data") if data else []
        for meta in anime_meta_list:
            anime = AnimeMetaInfo()
            anime.title = meta["name"]
            anime.cover_url = meta["pic"]
            anime.category = meta["type"]
            anime.detail_page_url = str(meta["vid"])
            anime.desc = meta["label"]
            ret.append(anime)
        return ret
Ejemplo n.º 2
0
    def get_detail(self, detail_page_url: str):
        url = self._base_url + detail_page_url
        logger.info(f"Parsing detail page: {url}")
        resp = self.get(url)
        if resp.status_code != 200:
            logger.warning(f"Response error: {resp.status_code} {url}")
            return AnimeDetailInfo()

        body = self.xpath(resp.text, '//div[@class="fire l"]')[0]
        anime_detail = AnimeDetailInfo()
        anime_detail.title = body.xpath("./div/h1/text()")[0]
        anime_detail.category = " ".join(
            body.xpath('.//div[@class="sinfo"]/span[3]/a/text()'))
        anime_detail.desc = body.xpath(
            './/div[@class="info"]/text()')[0].replace("\r\n", "").strip()
        anime_detail.cover_url = body.xpath(
            './/div[@class="thumb l"]/img/@src')[0]
        vc = VideoCollection()
        vc.name = "播放列表"
        video_blocks = body.xpath('.//div[@class="movurl"]//li')
        for block in video_blocks:
            video = Video()
            video.name = block.xpath("./a/text()")[0]
            video.raw_url = block.xpath("./a/@href")[0]  # '/v/3849-162.html'
            video.handler = "YHDMVideoHandler"
            vc.append(video)
        anime_detail.append(vc)
        return anime_detail
Ejemplo n.º 3
0
 def disable_danmaku(self, danmaku: str) -> bool:
     """禁用某个弹幕引擎"""
     if danmaku in self.get_all_danmaku():
         logger.warning(f"Danmaku {danmaku} disabled")
         self._dict["danmaku"][danmaku] = False
         self._save()
         return True
     return False
Ejemplo n.º 4
0
Archivo: run.py Proyecto: popmedd/Mario
def clean_db():
    if request.method == 'GET':
        result = show_db()
        return result
    if request.method == 'POST':
        logger.warning("{} 请求清理数据库,等待服务端处理".format(request.remote_addr))
        clean_status['clean_db'] = "waiting process"
        return "start clean db"
Ejemplo n.º 5
0
 def disable_engine(self, engine: str) -> bool:
     """禁用某个引擎"""
     if engine in self.get_all_engines():
         logger.warning(f"Engine {engine} disabled")
         self._dict["engines"][engine] = False
         self._save()
         return True
     return False
Ejemplo n.º 6
0
 def get_video_data(hash_key: str):
     """通过API代理访问, 获取视频数据流"""
     video = self._anime_db.fetch(hash_key)
     if not video:
         return "URL Invalid"
     if not video.real_url:
         logger.warning("Not real url")
         real_url = self._engine_mgr.get_video_url(video)
         video.real_url = real_url
         self._anime_db.update(hash_key, video)
     return self._engine_mgr.make_response_for(video)
Ejemplo n.º 7
0
Archivo: run.py Proyecto: popmedd/Mario
def del_client_rules():
    if request.method == 'POST':
        del_info = request.get_data().decode('utf-8')
        del_id = json.loads(del_info)['id']
        del_result = api.web.del_rules(del_id)
        logger.warning("{} 规则删除".format(del_id))
        return del_result
    if request.method == 'DELETE':
        del_result = api.web.del_rules("all")
        logger.warning("防御策略重置")
        return del_result
Ejemplo n.º 8
0
 def head(url: str, params=None, allow_redirects=True, **kwargs) -> requests.Response:
     """封装 HEAD 方法, 默认开启 302 重定向, 用于获取目标直链"""
     try:
         logger.debug(f"url: {url}, params: {params}, allow_redirects: {allow_redirects}")
         kwargs.setdefault("timeout", 10)
         kwargs.setdefault("headers", HtmlParseHelper._headers)
         return requests.head(url, params=params, verify=False, allow_redirects=allow_redirects, **kwargs)
     except requests.Timeout as e:
         logger.warning(e)
         return requests.Response()
     except requests.RequestException:
         return requests.Response()
Ejemplo n.º 9
0
 def get_city(ip, language="en", location=False):
     try:
         response_data = GeoipDatabase.city(ip)
     except geoip2.errors.AddressNotFoundError:
         original_ip = ip
         try:
             response_data = GeoipDatabase.city(config['client_ip'])
             logger.warning("远程客户端内网遭受攻击")
         except:
             response_data = GeoipDatabase.city(config['ip'][0])
             logger.warning("本地客户端内网遭到攻击")
     if location:
         try:
             ip_info = {}
             try:
                 ip_info["ip"] = original_ip
             except:
                 ip_info["ip"] = ip
             ip_info["country"] = response_data.country.names[language]
             ip_info["city"] = response_data.city.names[language]
             ip_info["longitude"] = response_data.location.longitude
             ip_info["latitude"] = response_data.location.latitude
             return ip_info
         except KeyError:
             ip_info = {}
             try:
                 ip_info["ip"] = original_ip
             except:
                 ip_info["ip"] = ip
             ip_info["country"] = response_data.country.names[language]
             ip_info["city"] = response_data.city.name
             if ip_info["city"] == None:
                 ip_info["city"] = ip_info["country"]
             ip_info["longitude"] = response_data.location.longitude
             ip_info["latitude"] = response_data.location.latitude
             return ip_info
     else:
         try:
             ip_info = {}
             try:
                 ip_info["ip"] = original_ip
             except:
                 ip_info["ip"] = ip
             ip_info["country"] = response_data.country.names[language]
             ip_info["city"] = response_data.city.names[language]
             return ip_info
         except KeyError:
             ip_info = {}
             ip_info["country"] = response_data.country.names[language]
             ip_info["city"] = response_data.city.name
             return ip_info
Ejemplo n.º 10
0
 def post(url: str, data=None, html_encoding="utf-8", **kwargs) -> requests.Response:
     """"封装 POST 方法, 默认网页编码为 utf-8"""
     try:
         logger.debug(f"url: {url}, data: {data}")
         kwargs.setdefault("timeout", 5)
         kwargs.setdefault("headers", HtmlParseHelper._headers)
         ret = requests.post(url, data, verify=False, **kwargs)
         ret.encoding = html_encoding
         return ret
     except requests.Timeout as e:
         logger.warning(e)
         return requests.Response()
     except requests.RequestException:
         return requests.Response()
Ejemplo n.º 11
0
def clean_mongo():
    clean_status['last_clean'] = int(time.time())
    src_iplist = []
    myclient = pymongo.MongoClient(config['mongo_url'], connect=False)
    mydb = myclient["mariodb"]
    alert_info = mydb['alert']
    coll_names = mydb.list_collection_names(session=None)
    for alert in alert_info.find():
        src_iplist.append(str(alert['src_ip']))
    for coll in coll_names:
        db = mydb[coll]
        del_id = []
        del_count = 0
        if coll == "fileinfo":
            del_count += db.find({
                "fileinfo.filename": {
                    "$regex": "eve_.*json"
                }
            }).count()
            db.delete_many({"fileinfo.filename": {"$regex": "eve_.*json"}})
            del_count += db.find({
                "fileinfo.filename": {
                    "$regex": "/api/"
                }
            }).count()
            db.delete_many({"fileinfo.filename": {"$regex": "/api/"}})
            del_count += db.find({
                "fileinfo.filename": {
                    "$regex": "local.rules"
                }
            }).count()
            db.delete_many({"fileinfo.filename": {"$regex": "local.rules"}})
        if coll in ['alert', 'flow', 'stats']:
            continue
        logger.info("清理数据库 {}".format(coll))
        for item in list(db.find().batch_size(500)[:]):
            try:
                if item['src_ip'] not in src_iplist:
                    del_id.append(item['_id'])
                else:
                    continue
            except:
                del_id.append(item['_id'])
                continue
        for data_id in del_id:
            query = {"_id": data_id}
            delresult = db.delete_one(query)
            del_count += delresult.deleted_count
        logger.warning("清理 {} 条数据".format(del_count))
    myclient.close()
Ejemplo n.º 12
0
 def get(url: str, params=None, html_encoding="utf-8", **kwargs) -> requests.Response:
     """封装 GET 方法, 默认网页编码为 utf-8"""
     try:
         logger.debug(f"url: {url}, params: {params}")
         kwargs.setdefault("timeout", 5)
         kwargs.setdefault("headers", HtmlParseHelper._headers)
         ret = requests.get(url, params, verify=False, **kwargs)
         ret.encoding = html_encoding  # 有些网页仍然使用 gb2312/gb18030 之类的编码, 需要单独设置
         return ret
     except requests.Timeout as e:
         logger.warning(e)
         return requests.Response()
     except requests.RequestException:
         return requests.Response()
Ejemplo n.º 13
0
    def search(self, keyword: str):
        logger.info(f"Searching for: {keyword}")
        resp = self.post(self._search_api, data={"userid": "", "key": keyword})
        if resp.status_code != 200:
            logger.warning(f"Response error: {resp.status_code} {self._search_api}")
            return []

        anime_meta_list = resp.json().get("data")
        ret = []
        for meta in anime_meta_list:
            anime = AnimeMetaInfo()
            anime.title = meta["videoName"]
            anime.cover_url = meta["videoImg"]
            anime.category = meta["videoClass"]
            anime.detail_page_url = meta["videoId"]
            ret.append(anime)
        return ret
Ejemplo n.º 14
0
def get_dbfile():
    ftphost = settings.GO_FTPHOST
    ftpuser = settings.GO_FTPUSER
    ftppass = settings.GO_FTPPASS
    dbpass = settings.GO_DBPASS
    if ftphost is None or ftpuser is None or ftppass is None:
        if os.path.exists('URLs.mdb'):
            logger.info('No credentials in env, using local MDB database file')
            logger.warning(
                'If this occurs outside development, contact an administrator')
            return 'URLs.mdb'
        else:
            raise Exception(
                'FTP credentials not provided (GO_FTPHOST, GO_FTPUSER, GO_FTPPASS)'
            )
    if dbpass is None:
        raise Exception(
            'Database encryption password not provided (GO_DBPASS)')
    logger.info('Attempting connection to FTP')
    ftp = FTP(ftphost)
    ftp.login(user=ftpuser, passwd=ftppass)
    ftp.cwd('/dmis/')
    data = []
    ftp.dir('-t', data.append)
    filename = data[-1].split()[3]

    # check if we already have this file
    files = glob('URLs*zip')
    if filename in files and os.path.exists('URLs.mdb'):
        ftp.quit()
        return 'URLs.mdb'

    # clean up old files
    for f in files:
        os.remove(f)

    logger.info('Fetching %s' % filename)
    with open(filename, 'wb') as f:
        ftp.retrbinary('RETR ' + filename, f.write, 2014)
    ftp.quit()

    logger.info('Unzipping database file')
    zp = ZipFile(filename)
    zp.extractall('./', pwd=dbpass.encode('cp850', 'replace'))
    return 'URLs.mdb'
Ejemplo n.º 15
0
 def get_real_url(self):
     """通过视频的 play_id 获取视频链接"""
     play_api = "http://service-agbhuggw-1259251677.gz.apigw.tencentcs.com/android/video/newplay"
     play_id = self.get_raw_url()
     secret_key = "zandroidzz"
     now = int(time.time() * 1000)  # 13 位时间戳
     sing = secret_key + str(now)
     sing = md5(sing.encode("utf-8")).hexdigest()
     logger.info(f"Parsing real url for {play_id}")
     payload = {"playid": play_id, "userid": "", "apptoken": "", "sing": sing, "map": now}
     resp = self.post(play_api, data=payload)
     if resp.status_code != 200:
         logger.warning(f"Response error: {resp.status_code} {play_api}")
         logger.debug(f"POST params: {payload}")
         return "error"
     real_url = resp.json()["data"]["videoplayurl"]
     logger.info(f"Video real url: {real_url}")
     return real_url
Ejemplo n.º 16
0
 def get_detail(self, detail_page_url: str):
     resp = self.get(self._detail_api, params={"userid": "", "videoId": detail_page_url})
     if resp.status_code != 200:
         logger.warning(f"Response error: {resp.status_code} {self._search_api}")
         return AnimeDetailInfo()
     detail = resp.json().get("data")  # 视频详情信息
     anime_detail = AnimeDetailInfo()
     anime_detail.title = detail["videoName"]
     anime_detail.cover_url = detail["videoImg"]
     anime_detail.desc = detail["videoDoc"].replace("\r\n", "")  # 完整的简介
     anime_detail.category = detail["videoClass"]
     for play_list in detail["videoSets"]:
         vc = VideoCollection()  # 番剧的视频列表
         vc.name = play_list["load"]  # 列表名, 线路 I, 线路 II
         for video in play_list["list"]:
             vc.append(Video(video["ji"], video["playid"], "ZZFunVideoHandler"))
         anime_detail.append(vc)
     return anime_detail
Ejemplo n.º 17
0
    def get_real_url(self) -> str:
        url = "http://www.yhdm.tv/" + self.get_raw_url()
        logger.info(f"Parsing real url for {url}")
        resp = self.get(url)
        if resp.status_code != 200:
            logger.warning(f"Response error: {resp.status_code} {url}")
            return "error"
        video_url = self.xpath(
            resp.text, '//div[@id="playbox"]/@data-vid')[0]  # "url$format"
        video_url = video_url.split(
            "$"
        )[0]  # "http://quan.qq.com/video/1098_ae4be38407bf9d8227748e145a8f97a5"
        if not video_url.startswith("http"):  # 偶尔出现一些无效视频
            logger.warning(f"This video is not valid: {video_url}")
            return "error"

        logger.debug(f"Redirect for {video_url}")
        resp = self.head(video_url, allow_redirects=True)  # 获取直链时会重定向 2 次
        logger.info(f"Video real url: {resp.url}")
        return resp.url  # 重定向之后的视频直链
Ejemplo n.º 18
0
    def parse_one_page(self, keyword: str, page: int):
        logger.info(f"Searching for {keyword}, page {page}")
        resp = self.get(self._search_api + "/" + keyword,
                        params={"page": page})
        if resp.status_code != 200:
            logger.warning(
                f"Response error: {resp.status_code} {self._search_api}")
            return [], ""

        anime_meta_list = self.xpath(resp.text, '//div[@class="lpic"]//li')
        ret = []
        for meta in anime_meta_list:
            anime = AnimeMetaInfo()
            anime.title = " ".join(meta.xpath(".//h2/a/@title"))
            anime.cover_url = meta.xpath("./a/img/@src")[0]
            anime.category = " ".join(meta.xpath("./span[2]/a/text()"))
            anime.desc = meta.xpath("./p/text()")[0]
            anime.detail_page_url = meta.xpath("./a/@href")[
                0]  # /show/5031.html
            ret.append(anime)
        return ret, resp.text
Ejemplo n.º 19
0
    def get_detail(self, detail_page_url: str):
        resp = self.get(self._detail_api, params={"vid": detail_page_url})
        if resp.status_code != 200 or resp.json()["code"] != 1:
            logger.warning(
                f"Response error: {resp.status_code} {self._search_api}")
            return AnimeDetailInfo()

        detail = resp.json().get("data")  # 视频详情信息
        anime_detail = AnimeDetailInfo()
        anime_detail.title = detail["name"]
        anime_detail.cover_url = detail["pic"]
        anime_detail.desc = detail["label"]
        anime_detail.category = detail["type"]

        vc = VideoCollection()
        vc.name = "视频列表"
        video_set = dict(detail["playUrl"])
        for name, url in video_set.items():
            vc.append(Video(name, url))
        anime_detail.append(vc)
        return anime_detail
Ejemplo n.º 20
0
def get_go_event(tags):
    '''
        Returns a GO Event object, by looking for a tag like `OP-<event_id>` or
        None if there is not a valid OP- tag on the Position
    '''
    event = None
    for tag in tags:
        if tag['name'].startswith('OP-'):
            event_id = tag['name'].replace('OP-', '').strip()
            try:
                event_id_int = int(event_id)
            except:
                logger.warning('%s tag is not a valid OP- tag' % event_id)
                continue
            try:
                event = Event.objects.get(id=event_id_int)
            except:
                logger.warning('Emergency with ID %d not found' % event_id_int)
                continue
            return event
    return event
Ejemplo n.º 21
0
def delete_es_index(instance):
    ''' instance needs an es_id() '''

    if ES_CLIENT and ES_PAGE_NAME:
        # To make sure it doesn't run for tests
        if hasattr(instance, 'es_id'):
            try:
                deleted, errors = bulk(client=ES_CLIENT,
                                       actions=[{
                                           '_op_type': 'delete',
                                           '_index': ES_PAGE_NAME,
                                           '_type': 'page',
                                           '_id': instance.es_id()
                                       }])
                logger.info(f'Deleted {deleted} records')
                log_errors(errors)
            except Exception:
                logger.error(
                    'Could not reach Elasticsearch server or index was already missing.'
                )
        else:
            logger.warning('instance does not have an es_id() method')
Ejemplo n.º 22
0
    def parse_country(self, gec_code, country_name):
        # If gec_code has a mapping then we use that Country straight
        gec = GEC_CODES.filter(code=gec_code).first()
        if gec:
            return gec.country

        # Otherwise gec_code must be an ISO code, but we're using country_name as a backup check
        if len(gec_code) == 2:
            # Filter for 'Country' types only
            country = Country.objects.filter(iso__iexact=gec_code,
                                             record_type=1).first()

            if country is None:
                country = Country.objects.filter(
                    name__iexact=country_name).first()
        else:
            country = Country.objects.filter(name__iexact=country_name).first()

        if not country:
            logger.warning(
                f'Could not find Country with: {gec_code} OR {country_name}')

        return country
Ejemplo n.º 23
0
def show_ioc():
    try:
        config['mongo_url']
    except:
        start()
    myclient = pymongo.MongoClient(config['mongo_url'], connect=False)
    try:
        mydb = myclient["azkaban"]
    except:
        logger.warning("无 IOC 插件")
        return "no ioc plug"
    coll_names = mydb.list_collection_names(session=None)
    db_info = {}
    db_info['data'] = []
    db_info['sum'] = 0
    for coll in coll_names:
        db = mydb[coll]
        info = {}
        info['name'] = coll
        info['count'] = db.find().count()
        db_info['sum'] += info['count']
        db_info['data'].append(info)
    return json.dumps(db_info)
Ejemplo n.º 24
0
def change_setting(settings):
    with open('./ThirPath/marioips/bin/senteve.sh', 'r') as script_senteve:
        old_base_settings = script_senteve.read()
        max_logfile_num = re.findall(r'-ge (.*?) ]', old_base_settings,
                                     re.S)[0]
        heartbeat_time = re.findall(r'sleep (.*?);', old_base_settings,
                                    re.S)[0]
        new_base_settings = old_base_settings.replace(
            max_logfile_num,
            settings['max_logfile_num']).replace(heartbeat_time,
                                                 settings['heartbeat_time'])
    with open('./ThirPath/marioips/bin/senteve.sh', 'w') as script_senteve:
        script_senteve.write(new_base_settings)
    # with open('./ThirPath/marioips/marioips.yaml','r') as marioips_yaml:
    #     old_mario_setting = marioips_yaml.read()
    #     save_pcap = re.findall(r'pcap-log:.+?enabled: (.*?) #setting save_pcap',old_mario_setting,re.DOTALL)[0]
    #     pcap_size = re.findall(r'\slimit: (.*?b)',old_mario_setting)[0]
    #     save_file = re.findall(r'file-store:.+?enabled: (.*?) #setting save_file',old_mario_setting,re.DOTALL)[0]
    #     new_mario_settings = old_mario_setting.replace(save_pcap,settings['save_pcap']).replace(pcap_size,settings['pcap_size']).replace(save_file,settings['save_file'])
    # with open('./ThirPath/marioips/marioips.yaml','w') as marioips_yaml:
    #     marioips_yaml.write(new_mario_settings)
    logger.warning("配置文件修改 {}".format(settings))
    config['update_setting_time'] = int(time.time())
    return "修改成功"
Ejemplo n.º 25
0
    def detect_video_format(self) -> str:
        """判断视频真正的格式, url 可能没有视频后缀"""
        # 尝试从 url 提取后缀
        url = self._get_real_url()
        try:
            ext = url.split("?")[0].split(".")[-1].lower()
            if ext in ["mp4", "flv"]:
                return ext
            if ext == "m3u8":
                return "hls"
        except (IndexError, AttributeError):
            pass

        # 视频的元数据中包含了视频的格式信息, 在视频开头寻找十六进制标识符推断视频格式
        format_hex = {
            "mp4": ["69736F6D", "70617663", "6D703432", "4D50454734", "4C617666"],
            "flv": ["464C56"],
            "hls": ["4558544D3355"]
        }

        _, data_iter = self._get_stream_from_server(0, 512)
        if not data_iter:
            logger.warning("Could not get video stream from server")
            return "unknown"

        logger.debug("Detecting video format from binary stream")
        video_meta = next(data_iter).hex().upper()
        for format_, hex_list in format_hex.items():
            for hex_sign in hex_list:
                if hex_sign in video_meta:
                    logger.debug(f"Video format: {format_}")
                    return format_
        logger.error("Could not detect video format from stream")
        logger.debug("Video raw binary stream (512byte):")
        logger.debug(video_meta)
        return "unknown"
Ejemplo n.º 26
0
    def handle(self, *args, **options):
        # get latest
        filename = get_dbfile()

        # numeric details records
        details_rc = extract_table(filename, 'EW_Report_NumericDetails')
        # check for 1 record for each field report
        fids = [r['ReportID'] for r in details_rc]
        if len(set(fids)) != len(fids):
            raise Exception(
                'More than one NumericDetails record for a field report')
        # numeric details records
        details_gov = extract_table(filename, 'EW_Report_NumericDetails_GOV')
        # check for 1 record for each field report
        fids = [r['ReportID'] for r in details_gov]
        if len(set(fids)) != len(fids):
            raise Exception(
                'More than one NumericDetails record for a field report')

        # information
        info_table = extract_table(filename, 'EW_Report_InformationManagement')
        fids = [r['ReportID'] for r in info_table]
        if len(set(fids)) != len(fids):
            raise Exception(
                'More than one InformationManagement record for a field report'
            )

        # ## many-to-many

        # actions taken
        actions_national = extract_table(filename,
                                         'EW_Report_ActionTakenByRedCross')
        actions_foreign = extract_table(filename,
                                        'EW_Report_ActionTakenByPnsRC')
        actions_federation = extract_table(
            filename, 'EW_Report_ActionTakenByFederationRC')

        # source types
        source_types = extract_table(filename, 'EW_lofSources')
        for s in source_types:
            SourceType.objects.get_or_create(
                pk=s['SourceID'], defaults={'name': s['SourceName']})

        source_table = extract_table(filename, 'EW_Reports_Sources')

        # disaster response
        dr_table = extract_table(filename, 'EW_DisasterResponseTools')
        # check for 1 record for each field report
        fids = [r['ReportID'] for r in dr_table]
        if len(set(fids)) != len(fids):
            raise Exception(
                'More than one DisasterResponseTools record for a field report'
            )

        # contacts
        contacts = extract_table(filename, 'EW_Report_Contacts')

        # field report
        reports = extract_table(filename, 'EW_Reports')
        rids = [r.rid for r in FieldReport.objects.all()]
        num_reports_created = 0
        logger.info('%s reports in database' % len(reports))
        for i, report in enumerate(reports):

            # Skip reports that we've already ingested.
            # We don't have to update them because field reports can't be updated in DMIS.
            rid = report['ReportID']
            if rid in rids:
                continue

            report_name = report['Summary']
            report_description = report['BriefSummary']
            report_dtype = DisasterType.objects.get(
                pk=PK_MAP[report['DisasterTypeID']])
            record = {
                'rid':
                rid,
                'summary':
                report_name,
                'description':
                report_description,
                'dtype':
                report_dtype,
                'status':
                report['StatusID'],
                'request_assistance':
                report['GovRequestsInternAssistance'],
                'actions_others':
                report['ActionTakenByOthers'],
                'report_date':
                datetime.strptime(report['Inserted'],
                                  REPORT_DATE_FORMAT).replace(tzinfo=pytz.utc),
            }
            details = fetch_relation(details_rc, report['ReportID'])
            assert (len(details) <= 1)
            if len(details) > 0:
                details = details[0]
                record.update({
                    'num_injured':
                    details['NumberOfInjured'],
                    'num_dead':
                    details['NumberOfCasualties'],
                    'num_missing':
                    details['NumberOfMissing'],
                    'num_affected':
                    details['NumberOfAffected'],
                    'num_displaced':
                    details['NumberOfDisplaced'],
                    'num_assisted':
                    details['NumberOfAssistedByRC'],
                    'num_localstaff':
                    details['NumberOfLocalStaffInvolved'],
                    'num_volunteers':
                    details['NumberOfVolunteersInvolved'],
                    'num_expats_delegates':
                    details['NumberOfExpatsDelegates']
                })
            details = fetch_relation(details_gov, report['ReportID'])
            assert (len(details) <= 1)
            if len(details) > 0:
                details = details[0]
                record.update({
                    'gov_num_injured':
                    details['NumberOfInjured_GOV'],
                    'gov_num_dead':
                    details['NumberOfDead_GOV'],
                    'gov_num_missing':
                    details['NumberOfMissing_GOV'],
                    'gov_num_affected':
                    details['NumberOfAffected_GOV'],
                    'gov_num_displaced':
                    details['NumberOfDisplaced_GOV'],
                    'gov_num_assisted':
                    details['NumberOfAssistedByGov_GOV']
                })
            info = fetch_relation(info_table, report['ReportID'])
            if len(info) > 0:
                info = {k: '' if v is None else v for k, v in info[0].items()}
                record.update({
                    'bulletin': {
                        '': 0,
                        'None': 0,
                        'Planned': 2,
                        'Published': 3
                    }[info['InformationBulletin']],
                    'dref': {
                        '': 0,
                        'No': 0,
                        'Planned': 2,
                        'Yes': 3
                    }[info['DREFRequested']],
                    'dref_amount':
                    0 if info['DREFRequestedAmount'] == '' else float(
                        info['DREFRequestedAmount']),
                    'appeal': {
                        '': 0,
                        'Planned': 2,
                        'Yes': 3,
                        'NB': 0,
                        'No': 0,
                        'YES': 3
                    }[info['EmergencyAppeal']],
                    'appeal_amount':
                    0 if info['EmergencyAppealAmount'] == '' else float(
                        info['EmergencyAppealAmount']),
                })
            # disaster response
            response = fetch_relation(dr_table, report['ReportID'])

            if len(response) > 0:
                response = {
                    k: '' if v is None else v
                    for k, v in response[0].items()
                }
                record.update({
                    'rdrt': {
                        '': 0,
                        'No': 0,
                        'Yes': 3,
                        'Planned/Requested': 2
                    }[response['RDRT']],
                    'fact': {
                        '': 0,
                        'No': 0,
                        'Yes': 3,
                        'Planned/Requested': 2
                    }[response['FACT']],
                    'eru_relief': {
                        '': 0,
                        'Yes': 3,
                        'Planned/Requested': 2,
                        'No': 0
                    }[response['ERU']]
                })

            field_report = FieldReport(**record)

            # Create an associated event object
            event_record = {
                'name':
                report_name if len(report_name) else report_dtype.name,
                'summary':
                report_description,
                'dtype':
                report_dtype,
                'disaster_start_date':
                datetime.utcnow().replace(tzinfo=timezone.utc),
                'auto_generated':
                True,
                'auto_generated_source':
                SOURCES['report_ingest'],
            }
            event = Event(**event_record)
            event.save()

            field_report.event = event
            field_report.save()
            num_reports_created = num_reports_created + 1

            try:
                country = Country.objects.select_related().get(
                    pk=report['CountryID'])
            except ObjectDoesNotExist:
                logger.warning('Could not find a matching country for %s' %
                               report['CountryID'])
                country = None

            if country is not None:
                field_report.countries.add(country)
                event.countries.add(country)
                if country.region is not None:
                    # No need to add a field report region, as that happens through a trigger.
                    field_report.regions.add(country.region)
                    event.regions.add(country.region)

            # ## add items with foreignkeys to report
            # national red cross actions
            actions = fetch_relation(actions_national, report['ReportID'])
            if len(actions) > 0:
                txt = ' '.join(
                    [a['Value'] for a in actions if a['Value'] is not None])
                act = ActionsTaken(organization='NTLS',
                                   summary=txt,
                                   field_report=field_report)
                act.save()
                for pk in [a['ActionTakenByRedCrossID'] for a in actions]:
                    act.actions.add(*Action.objects.filter(pk=pk))

            # foreign red cross actions
            actions = fetch_relation(actions_foreign, report['ReportID'])
            if len(actions) > 0:
                txt = ' '.join(
                    [a['Value'] for a in actions if a['Value'] is not None])
                act = ActionsTaken(organization='PNS',
                                   summary=txt,
                                   field_report=field_report)
                act.save()
                for pk in [a['ActionTakenByRedCrossID'] for a in actions]:
                    act.actions.add(*Action.objects.filter(pk=pk))

            # federation red cross actions
            actions = fetch_relation(actions_federation, report['ReportID'])
            if len(actions) > 0:
                txt = ' '.join(
                    [a['Value'] for a in actions if a['Value'] is not None])
                act = ActionsTaken(organization='FDRN',
                                   summary=txt,
                                   field_report=field_report)
                act.save()
                for pk in [a['ActionTakenByRedCrossID'] for a in actions]:
                    act.actions.add(*Action.objects.filter(pk=pk))

            # sources
            sources = fetch_relation(source_table, report['ReportID'])
            for s in sources:
                spec = '' if s['Specification'] is None else s['Specification']
                Source.objects.create(
                    stype=SourceType.objects.get(pk=s['SourceID']),
                    spec=spec,
                    field_report=field_report)

            # disaster response
            response = fetch_relation(dr_table, report['ReportID'])

            # contacts
            contact = fetch_relation(contacts, report['ReportID'])
            if len(contact) > 0:
                # make sure just one contacts record
                assert (len(contact) == 1)
                contact = contact[0]
                fields = [
                    'Originator', 'Primary', 'Federation', 'NationalSociety',
                    'MediaNationalSociety', 'Media'
                ]
                for f in fields:
                    if contact_is_valid(contact, f):
                        FieldReportContact.objects.create(
                            ctype=f,
                            name=contact['%sName' % f],
                            title=contact['%sFunction' % f],
                            email=contact['%sContact' % f],
                            field_report=field_report,
                        )
        total_reports = FieldReport.objects.all()
        logger.info('%s reports created' % num_reports_created)
        logger.info('%s reports in database' % total_reports.count())

        # org type mapping
        org_types = {
            '1': 'NTLS',
            '2': 'DLGN',
            '3': 'SCRT',
            '4': 'ICRC',
        }
        last_login_threshold = timezone.now() - timedelta(days=365)

        # add users
        user_records = extract_table(filename, 'DMISUsers')
        processed_users = 0
        for i, user_data in enumerate(user_records):
            if user_data['LoginLastSuccess'] == '':
                continue

            last_login = datetime.strptime(
                user_data['LoginLastSuccess'],
                REPORT_DATE_FORMAT,
            )
            last_login = pytz.UTC.localize(last_login)

            # skip users who haven't logged in for a year
            if last_login < last_login_threshold:
                continue

            try:
                user = User.objects.get(username=user_data['UserName'])
            except ObjectDoesNotExist:
                user = None

            if user is None:
                name = user_data['RealName'].split()
                first_name = name[0]
                last_name = ' '.join(name[1:]) if len(name) > 1 else ''
                user = User.objects.create(
                    username=user_data['UserName'],
                    first_name=first_name if len(first_name) <= 30 else '',
                    last_name=last_name if len(last_name) <= 30 else '',
                    email=user_data['EmailAddress'],
                    last_login=last_login,
                )
                user.set_password(user_data['Password'])
                user.is_staff = True if user_data[
                    'UserIsSysAdm'] == '1' else False

            # set user profile info
            user.profile.org = user_data['OrgTypeSpec'] if len(
                user_data['OrgTypeSpec']) <= 100 else ''
            user.profile.org_type = org_types.get(user_data['OrgTypeID'])
            # print(i, user_data['CountryID']) # - for debug purposes. Check go-api/data/Countries.csv for details.
            if user_data['CountryID'] in ['275', '281']:
                user_data[
                    'CountryID'] = '47'  # Hong Kong or Taiwan should be handled as China. Macao (279) is other case.
            elif user_data['CountryID'] in ['284']:
                user_data[
                    'CountryID'] = '292'  # Zone Central and West Africa -> Central Africa Country Cluster
            user.profile.country = Country.objects.get(
                pk=user_data['CountryID'])
            user.profile.city = user_data['City'] if len(
                user_data['City']) <= 100 else ''
            user.profile.department = user_data['Department'] if len(
                user_data['Department']) <= 100 else ''
            user.profile.position = user_data['Position'] if len(
                user_data['Position']) <= 100 else ''
            user.profile.phone_number = user_data['PhoneNumberProf'] if len(
                user_data['PhoneNumberProf']) <= 100 else ''
            user.save()
            processed_users = processed_users + 1
        logger.info('%s updated active user records' % processed_users)
Ejemplo n.º 27
0
def sync_open_positions(molnix_positions, molnix_api, countries):
    molnix_ids = [p['id'] for p in molnix_positions]
    warnings = []
    messages = []
    successful_creates = 0
    successful_updates = 0

    for position in molnix_positions:
        event = get_go_event(position['tags'])
        country = get_go_country(countries, position['country_id'])
        if not country:
            warning = 'Position id %d does not have a valid Country' % (
                position['id'])
            logger.warning(warning)
            warnings.append(warning)
            continue
        # If no valid GO Emergency tag is found, skip Position
        if not event:
            warning = 'Position id %d does not have a valid Emergency tag.' % position[
                'id']
            logger.warning(warning)
            warnings.append(warning)
            continue
        go_alert, created = SurgeAlert.objects.get_or_create(
            molnix_id=position['id'])
        # We set all Alerts coming from Molnix to RR / Alert
        go_alert.atype = SurgeAlertType.RAPID_RESPONSE
        go_alert.category = SurgeAlertCategory.ALERT
        # print(json.dumps(position, indent=2))
        go_alert.molnix_id = position['id']
        go_alert.message = position['name']
        go_alert.molnix_status = position['status']
        go_alert.event = event
        go_alert.country = country
        go_alert.opens = get_datetime(position['opens'])
        go_alert.closes = get_datetime(position['closes'])
        go_alert.start = get_datetime(position['start'])
        go_alert.end = get_datetime(position['end'])
        go_alert.is_active = True
        go_alert.save()
        add_tags_to_obj(go_alert, position['tags'])
        if created:
            successful_creates += 1
        else:
            successful_updates += 1

    # Find existing active alerts that are not in the current list from Molnix
    existing_alerts = SurgeAlert.objects.filter(is_active=True).exclude(
        molnix_id__isnull=True)
    existing_alert_ids = [e.molnix_id for e in existing_alerts]
    inactive_alerts = list(set(existing_alert_ids) - set(molnix_ids))

    # Mark alerts that are no longer in Molnix as inactive
    for alert in SurgeAlert.objects.filter(molnix_id__in=inactive_alerts):
        # We need to check the position ID in Molnix
        # If the status is "unfilled", we don't mark the position as inactive,
        # just set status to unfilled
        position = molnix_api.get_position(alert.molnix_id)
        if not position:
            warnings.append('Position id %d not found in Molnix API' %
                            alert.molnix_id)
        if position and position['status'] == 'unfilled':
            alert.molnix_status = position['status']
        else:
            alert.is_active = False
        alert.save()

    marked_inactive = len(inactive_alerts)
    messages = [
        'Successfully created: %d' % successful_creates,
        'Successfully updated: %d' % successful_updates,
        'Marked inactive: %d' % marked_inactive,
        'No of Warnings: %d' % len(warnings)
    ]
    return messages, warnings, successful_creates
Ejemplo n.º 28
0
    def handle(self, *args, **options):
        logger.info('Starting appeal document ingest')

        # v smoke test
        baseurl = 'https://www.ifrc.org/appeals/'  # no more ...en/publications-and-reports...
        http = PoolManager(
        )  # stackoverflow.com/questions/36516183/what-should-i-use-to-open-a-url-instead-of-urlopen-in-urllib3
        smoke_response = http.request('GET', baseurl)
        joy_to_the_world = False
        if smoke_response.status == 200:
            joy_to_the_world = True  # We log the success later, when we know the numeric results.
        else:
            body = {
                "name": "ingest_appeal_docs",
                "message":
                f'Error ingesting appeals_docs on url: {baseurl}, error_code: {smoke_response.code}',
                "status": CronJobStatus.ERRONEOUS
            }
            CronJob.sync_cron(body)
        # ^ smoke test

        if options['fullscan']:
            # If the `--fullscan` option is passed (at the end of command), check ALL appeals. Runs an hour!
            print('Doing a full scan of all Appeals')
            qset = Appeal.objects.all()
        else:
            # By default, only check appeals for the past 3 months where Appeal Documents is 0
            now = datetime.now().replace(tzinfo=timezone.utc)
            six_months_ago = now - relativedelta(months=6)
            # This was the original qset, but it wouldn't get newer docs for the same Appeals
            # qset = Appeal.objects.filter(appealdocument__isnull=True).filter(end_date__gt=six_months_ago)
            qset = Appeal.objects.filter(end_date__gt=six_months_ago)

        # qset = Appeal.objects.filter(code='Something')  # could help debug
        # First get all Appeal Codes
        appeal_codes = [a.code for a in qset]

        # Modify code taken from https://pastebin.com/ieMe9yPc to scrape `publications-and-reports` and find
        # Documents for each appeal code
        output = []
        page_not_found = []
        for code in appeal_codes:
            code = code.replace(' ', '')
            docs_url = f'{baseurl}?appeal_code={code}'  # no more ac={code}&at=0&c=&co=&dt=1&f=&re=&t=&ti=&zo=
            try:
                http = PoolManager()
                response = http.request('GET', docs_url)
            except Exception:  # if we get an error fetching page for an appeal, we ignore it
                page_not_found.append(code)
                continue

            soup = BeautifulSoup(response.data, "lxml")
            div = soup.find('div', class_='row appeals-view__row')
            for t in div.findAll('tbody'):
                output = output + self.makelist(t)

        # Once we have all Documents in output, we add all missing Documents to the associated Appeal
        not_found = []
        existing = []
        created = []

        acodes = list(set([a['appealcode'] for a in output]))
        for code in acodes:
            try:
                appeal = Appeal.objects.get(code=code)
            except ObjectDoesNotExist:
                not_found.append(code)
                continue

            existing_docs = list(appeal.appealdocument_set.all())
            docs = [a for a in output if code == a['appealcode']]
            for doc in docs:
                if doc['url'].startswith('/'):  # can be /docs or /sites also
                    doc['url'] = f'https://www.ifrc.org{doc["url"]}'
                    # href only contains relative path to the document if it's available at the ifrc.org site
                exists = len([
                    a for a in existing_docs if a.document_url == doc['url']
                ]) > 0
                if exists:
                    existing.append(doc['url'])
                else:
                    try:
                        created_at = self.parse_date(doc['date'])
                    except Exception:
                        created_at = None

                    AppealDocument.objects.create(
                        document_url=doc['url'],
                        name=doc[
                            'appealtype'],  # not ['name'], because this is the appeal's name
                        created_at=created_at,
                        appeal=appeal,
                    )
                    created.append(doc['url'])
        text_to_log = []
        text_to_log.append('%s appeal documents created' % len(created))
        text_to_log.append('%s existing appeal documents' % len(existing))
        text_to_log.append('%s pages not found for appeal' %
                           len(page_not_found))

        for t in text_to_log:
            logger.info(t)
            # body = { "name": "ingest_appeal_docs", "message": t, "status": CronJobStatus.SUCCESSFUL }
            # CronJob.sync_cron(body)

        if len(not_found):
            t = '%s documents without appeals in system' % len(not_found)
            logger.warning(t)
            body = {
                "name": "ingest_appeal_docs",
                "message": t,
                "num_result": len(not_found),
                "status": CronJobStatus.WARNED
            }
            CronJob.sync_cron(body)

        if (joy_to_the_world):
            body = {
                "name":
                "ingest_appeal_docs",
                "message": (f'Done ingesting appeals_docs on url {baseurl},'
                            f' {len(created)} appeal document(s) were created,'
                            f' {len(existing)} already exist,'
                            f' {len(page_not_found)} not found'),
                "num_result":
                len(created),
                "status":
                CronJobStatus.SUCCESSFUL
            }
            CronJob.sync_cron(body)
Ejemplo n.º 29
0
    def handle(self, *args, **options):
        logger.info('Starting Deployment ingest')

        # url = 'https://proxy.hxlstandard.org/data.json?url=https%3A%2F%2Fdocs.google.com%2Fspreadsheets%2Fd%2F1CBvledFYc_uwlvHTvJE0SYS7_mPGU2L-zhrqbB4KNIA%2Fedit%23gid%3D0&header-row=1' # not enough.
        url = 'https://proxy.hxlstandard.org/data.json?tagger-match-all=on&' \
            + 'tagger-01-header=year&' \
            + 'tagger-01-tag=%23a1&' \
            + 'tagger-02-header=%2Aappeal+code&' \
            + 'tagger-02-tag=%23a2&' \
            + 'tagger-03-header=region&' \
            + 'tagger-03-tag=%23a3&' \
            + 'tagger-04-header=country&' \
            + 'tagger-04-tag=%23a4&' \
            + 'tagger-05-header=location&' \
            + 'tagger-05-tag=%23a5&' \
            + 'tagger-06-header=disaster+type&' \
            + 'tagger-06-tag=%23a6&' \
            + 'tagger-07-header=%2Adisaster+name&' \
            + 'tagger-07-tag=%23a7&' \
            + 'tagger-08-header=%2Aname&' \
            + 'tagger-08-tag=%23a8&' \
            + 'tagger-09-header=%2Adeploying+ns+%2F+ifrc+office&' \
            + 'tagger-09-tag=%23a9&' \
            + 'tagger-10-header=%2Agender&' \
            + 'tagger-10-tag=%23b1&' \
            + 'tagger-11-header=language&' \
            + 'tagger-11-tag=%23b2&' \
            + 'tagger-12-header=%2Aposition&' \
            + 'tagger-12-tag=%23b3&' \
            + 'tagger-13-header=%2Atype&' \
            + 'tagger-13-tag=%23b4&' \
            + 'tagger-14-header=supported+by+ns&' \
            + 'tagger-14-tag=%23b5&' \
            + 'tagger-15-header=availability&' \
            + 'tagger-15-tag=%23b6&' \
            + 'tagger-16-header=%2Aexp+start+date&' \
            + 'tagger-16-tag=%23b7&' \
            + 'tagger-17-header=%2Aexp+duration&' \
            + 'tagger-17-tag=%23b8&' \
            + 'tagger-18-header=%2Aalert&' \
            + 'tagger-18-tag=%23b9&' \
            + 'tagger-19-header=deployment+message&' \
            + 'tagger-19-tag=%23c1&' \
            + 'tagger-20-header=%2Astart+of+mission&' \
            + 'tagger-20-tag=%23c2&' \
            + 'tagger-21-header=%2Aend+of+mission&' \
            + 'tagger-21-tag=%23c3&' \
            + 'tagger-22-header=deployment+duration&' \
            + 'tagger-22-tag=%23c4&' \
            + 'tagger-23-header=deployed&' \
            + 'tagger-23-tag=%23c5&' \
            + 'tagger-24-header=rotation&' \
            + 'tagger-24-tag=%23c6&' \
            + 'tagger-25-header=comments&' \
            + 'tagger-25-tag=%23c7&' \
            + 'url=https%3A%2F%2Fdocs.google.com%2Fspreadsheets%2Fd%2F1CBvledFYc_uwlvHTvJE0SYS7_mPGU2L-zhrqbB4KNIA%2Fedit%23gid%3D0&' \
            + 'header-row=1'

        response = requests.get(url)
        if response.status_code != 200:
            logger.error('Error querying Deployment HXL API')
            raise Exception('Error querying Deployment HXL API')
        records = response.json()

        # some logging variables
        not_found = []
        existing = []
        created = []

        columns = [a.replace('*', '').replace(' ', '') for a in records[0]]
        # ['Year', 'AppealCode', 'Region', 'Country', 'Location', 'Disastertype', 'Disastername', 'Name', 'DeployingNS/IFRCOffice', 'Gender', 'Language', 'Position', 'Type', 'SupportedbyNS', 'Availability', 'Expstartdate', 'expduration', 'Alert', 'Deploymentmessage', 'Startofmission', 'Endofmission', 'DeploymentDuration', 'Deployed', 'Rotation', 'Comments']
        #     0          1          2          3          4          5                    6          7          8                       9          10          11          12          13             14            15              16          17          18                    19                    20                21          22          23          24

        # if empty name -> Alert, otherwise -> Deployment

        #       OBSOLETE:

        #        # group records by appeal code
        #        acodes = list(set([a[2] for a in records[2:]]))
        #        for code in acodes:
        #            try:
        #                appeal = Appeal.objects.get(code=code)
        #            except ObjectDoesNotExist:
        #                not_found.append(code)
        #                continue
        #
        #            existing_docs = list(appeal.appealdocument_set.all())
        #            docs = [a for a in records if a[2] == code]
        #            for doc in docs:
        #                exists = len([a for a in existing_docs if a.document_url == doc[0]]) > 0
        #                if exists:
        #                    existing.append(doc[0])
        #                else:
        #                    try:
        #                        created_at = self.parse_date(doc[5])
        #                    except:
        #                        created_at = None
        #
        #                    AppealDocument.objects.create(
        #                        document_url=doc[0],
        #                        name=doc[4],
        #                        created_at=created_at,
        #                        appeal=appeal,
        #                    )
        #                    created.append(doc[0])
        logger.info('%s Deployments created' % len(created))
        logger.info('%s existing Deployments' % len(existing))
        logger.warning('%s documents without appeals in system' %
                       len(not_found))
Ejemplo n.º 30
0
 def clear(self):
     logger.warning(
         f"{self.__class__.__name__} cleaning, object in total: {len(self._db)}"
     )
     self._db.clear()