def export(table, db=None, valid=False, limit=None, path=None, format='csv', show=False): """ OneForAll数据库导出模块 Example: python3 dbexport.py --table name --format csv --dir= ./result.csv python3 dbexport.py --db result.db --table name --show False Note: 参数format可选格式有'txt', 'rst', 'csv', 'tsv', 'json', 'yaml', 'html', 'jira', 'xls', 'xlsx', 'dbf', 'latex', 'ods' 参数path默认None使用OneForAll结果目录自动生成路径 :param str table: 要导出的表 :param str db: 要导出的数据库路径(默认为results/result.sqlite3) :param bool valid: 只导出有效的子域结果(默认False) :param str limit: 导出限制条件(默认None) :param str format: 导出文件格式(默认csv) :param str path: 导出文件路径(默认None) :param bool show: 终端显示导出数据(默认False) """ database = Database(db) rows = database.export_data(table, valid, limit) format = utils.check_format(format, len(rows)) path = utils.check_path(path, table, format) if show: print(rows.dataset) data = rows.export(format) database.close() utils.save_data(path, data) logger.log('INFOR', f'{table}主域的子域结果 {path}') data_dict = rows.as_dict() return data_dict
def main(self): if self.brute is None: self.brute = config.enable_brute_module if self.verify is None: self.verify = config.enable_verify_subdomain rename_table = self.domain + '_last' collect = Collect(self.domain, export=False) collect.run() if self.brute: # 由于爆破会有大量dns解析请求 并发爆破可能会导致其他任务中的网络请求异常 brute = AIOBrute(self.domain, export=False) brute.run() db = Database() db.copy_table(self.domain, self.domain + '_ori') db.remove_invalid(self.domain) db.deduplicate_subdomain(self.domain) # 不验证子域的情况 if not self.verify: # 数据库导出 self.valid = None dbexport.export(self.domain, valid=self.valid, format=self.format, show=self.show) db.drop_table(rename_table) db.rename_table(self.domain, rename_table) return # 开始验证子域工作 self.datas = db.get_data(self.domain).as_dict() loop = asyncio.get_event_loop() asyncio.set_event_loop(loop) # 解析域名地址 task = resolve.bulk_query_a(self.datas) self.datas = loop.run_until_complete(task) # 保存解析结果 resolve_table = self.domain + '_res' db.drop_table(resolve_table) db.create_table(resolve_table) db.save_db(resolve_table, self.datas, 'resolve') # 请求域名地址 task = request.bulk_get_request(self.datas, self.port) self.datas = loop.run_until_complete(task) # 在关闭事件循环前加入一小段延迟让底层连接得到关闭的缓冲时间 loop.run_until_complete(asyncio.sleep(0.25)) db.clear_table(self.domain) db.save_db(self.domain, self.datas) # 数据库导出 dbexport.export(self.domain, valid=self.valid, format=self.format, show=self.show) db.drop_table(rename_table) db.rename_table(self.domain, rename_table) db.close()
def export(table, db=None, valid=None, path=None, format='csv', show=False): """ OneForAll数据库导出模块 Example: python3 dbexport.py --table name --format csv --dir= ./result.csv python3 dbexport.py --db result.db --table name --show False Note: 参数port可选值有'small', 'medium', 'large', 'xlarge',详见config.py配置 参数format可选格式有'txt', 'rst', 'csv', 'tsv', 'json', 'yaml', 'html', 'jira', 'xls', 'xlsx', 'dbf', 'latex', 'ods' 参数path默认None使用OneForAll结果目录生成路径 :param str table: 要导出的表 :param str db: 要导出的数据库路径(默认为results/result.sqlite3) :param int valid: 导出子域的有效性(默认None) :param str format: 导出文件格式(默认csv) :param str path: 导出文件路径(默认None) :param bool show: 终端显示导出数据(默认False) """ database = Database(db) rows = database.export_data(table, valid) format = utils.check_format(format, len(rows)) path = utils.check_path(path, table, format) if show: print(rows.dataset) if format == 'txt': data = str(rows.dataset) else: data = rows.export(format) database.close() utils.save_data(path, data)
def export(table, db=None, valid=None, dpath=None, format='csv', show=False): """ OneForAll数据库导出模块 Example: python3 dbexport.py --table name --format csv --dir= ./result.csv python3 dbexport.py --db result.db --table name --show False Note: 参数port可选值有'small', 'medium', 'large', 'xlarge',详见config.py配置 参数format可选格式有'txt', 'rst', 'csv', 'tsv', 'json', 'yaml', 'html', 'jira', 'xls', 'xlsx', 'dbf', 'latex', 'ods' 参数dpath为None默认使用OneForAll结果目录 :param str table: 要导出的表 :param str db: 要导出的数据库路径(默认为results/result.sqlite3) :param int valid: 导出子域的有效性(默认None) :param str format: 导出格式(默认csv) :param str dpath: 导出目录(默认None) :param bool show: 终端显示导出数据(默认False) """ format = utils.check_format(format) dpath = utils.check_dpath(dpath) database = Database(db) rows = database.export_data(table, valid) # 意外情况导出全部子域 if show: print(rows.dataset) if format == 'txt': data = str(rows.dataset) else: data = rows.export(format) database.close() fpath = dpath.joinpath(f'{table}_subdomain.{format}') utils.save_data(fpath, data)
def deal_db(self): """ 收集任务完成时对数据库进行处理 """ db = Database() db.deal_table(self.domain, self.origin_table) db.close()
def deal_db(self): """ Process the data when the collection task is completed """ db = Database() db.deal_table(self.domain, self.origin_table) db.close()
def save_db(self, data): db = Database() for info in data: url = info.pop('url') info.pop('cname') info.pop('ip') info.pop('header') db.update_data_by_url(self.domain, info, url) db.close()
def save_db(self): lock.acquire() db = Database() db.create_table(self.domain) source, results = self.results # 将结果存入数据库中 db.save_db(self.domain, results, source) db.close() lock.release()
def save_db(self): """ 将模块结果存入数据库中 """ lock.acquire() db = Database() db.create_table(self.domain) db.save_db(self.domain, self.results, self.source) db.close() lock.release()
def save_db(self): """ Save module results into the database """ logger.log('DEBUG', f'Saving results to database') lock.acquire() db = Database() db.create_table(self.domain) db.save_db(self.domain, self.results, self.source) db.close() lock.release()
def save_data(name, data): """ Save request results to database :param str name: table name :param list data: data to be saved """ db = Database() db.drop_table(name) db.create_table(name) db.save_db(name, data, 'request') db.close()
def save_data(name, data): """ 保存解析结果到数据库 :param str name: 保存表名 :param list data: 待保存的数据 """ db = Database() db.drop_table(name) db.create_table(name) db.save_db(name, data, 'resolve') db.close()
def save_db(self): """ 将模块结果存入数据库中 """ logger.log('DEBUG', f'正在将结果存入到数据库') lock.acquire() db = Database() db.create_table(self.domain) db.save_db(self.domain, self.results, self.source) db.close() lock.release()
def export(target, type='target', db=None, alive=False, limit=None, path=None, format='csv', show=False): """ OneForAll export from database module Example: python3 dbexport.py --target name --format csv --dir= ./result.csv python3 dbexport.py --db result.db --target name --show False python3 dbexport.py --target table_name --tb True --show False Note: --format rst/csv/tsv/json/yaml/html/jira/xls/xlsx/dbf/latex/ods (result format) --path Result directory (default directory is ./results) :param str target: Table to be exported :param str type: Type of target :param str db: Database path to be exported (default ./results/result.sqlite3) :param bool alive: Only export the results of alive subdomains (default False) :param str limit: Export limit (default None) :param str format: Result format (default csv) :param str path: Result directory (default None) :param bool show: Displays the exported data in terminal (default False) """ if type == 'target': database = Database(db) domains = utils.get_domains(target) datas = [] if domains: for domain in domains: table_name = domain_to_table(domain) rows = database.export_data(table_name, alive, limit) if rows is None: continue data = export_data(format, path, rows, show, table_name, target) datas.extend(data) database.close() if len(domains) > 1: utils.export_all(alive, format, path, datas) elif type == 'table': database = Database(db) rows = database.export_data(target, alive, limit) data = export_data(format, path, rows, show, target, target) database.close() return data
def save_db(name, data, module): """ Save request results to database :param str name: table name :param list data: data to be saved :param str module: module name """ db = Database() db.drop_table(name) db.create_table(name) db.save_db(name, data, module) db.close()
def save_data(name, data): """ 保存解析结果到数据库 :param str name: 保存表名 :param list data: 待保存的数据 """ logger.log('INFOR', f'Saving resolved results') db = Database() db.drop_table(name) db.create_table(name) db.save_db(name, data, 'resolve') db.close()
def get_satellites_by_hazard_id(hazard_id_param: int): db = Database() satellites: List[Satellite] = db.get_satellites_by_hazard_id( hazard_id=hazard_id_param) db.close() data_to_return = [{ 'satellite_id': sat.get_value(), 'satellite_name': str(sat) } for sat in satellites] return jsonify(data_to_return)
def export(self, table): """ 从数据库中导出数据并做一些后续数据库善后处理 :param table: 要导出的表名 :return: 导出的数据 :rtype: list """ db = Database() data = dbexport.export(table, alive=self.alive, format=self.format) db.drop_table(self.new_table) db.rename_table(self.domain, self.new_table) db.close() return data
def export(self, table): """ Export data from the database and do some follow-up processing :param table: table name :return: export data :rtype: list """ db = Database() data = dbexport.export(table, type='table', alive=self.alive, format=self.format) db.drop_table(self.new_table) db.rename_table(self.domain, self.new_table) db.close() return data
def save(name, total, req_data, resp_queue): db = Database() db.create_table(name) i = 0 while True: if not resp_queue.empty(): i += 1 index, resp = resp_queue.get() old_info = req_data[index] new_info = gen_new_info(old_info, resp) db.insert_table(name, new_info) resp_queue.task_done() if i >= total: # 得存入完所有请求结果才能结束 break db.close()
def save(name, total, req_data, resp_queue): db = Database() db.create_table(name) i = 0 while True: if not resp_queue.empty(): i += 1 index, resp = resp_queue.get() old_info = req_data[index] new_info = gen_new_info(old_info, resp) db.insert_table(name, new_info) resp_queue.task_done() if i >= total: # You have to save all the request results to end break db.close()
def mark(self): """ 标记新发现子域 :return: 标记后的的子域数据 :rtype: list """ db = Database() old_data = list() now_data = db.get_data(self.domain).as_dict() # 非第一次收集子域的情况时数据库预处理 if db.exist_table(self.new_table): db.drop_table(self.old_table) # 如果存在上次收集结果表就先删除 db.rename_table(self.new_table, self.old_table) # 新表重命名为旧表 old_data = db.get_data(self.old_table).as_dict() db.close() marked_data = utils.mark_subdomain(old_data, now_data) return marked_data
def get_hazard_data(hazard_type_param: str, hazard_id_param: str): try: HazardType.from_string(hazard_type_param) except ValueError: # send back an exception abort(404, "Hazard Type {0} does not exist.".format(hazard_type_param)) db = Database() hazard = db.get_hazard_info_by_hazard_id(hazard_id_param) db.close() if not hazard: abort(404, 'Hazard with id {0} does not exist'.format(hazard.hazard_id)) data = parse_hazard_data_from_db(hazard) return jsonify(data)
def export_data(target, db=None, alive=False, limit=None, path=None, fmt='csv', show=False): """ Garuda export from database module Example: python3 export.py --target name --fmt csv --dir= ./result.csv python3 export.py --target name --tb True --show False python3 export.py --db result.db --target name --show False Note: --fmt csv/json (result format) --path Result directory (default directory is ./results) :param str target: Table to be exported :param str db: Database path to be exported (default ./results/result.sqlite3) :param bool alive: Only export the results of alive subdomains (default False) :param str limit: Export limit (default None) :param str fmt: Result format (default csv) :param str path: Result directory (default None) :param bool show: Displays the exported data in terminal (default False) """ database = Database(db) domains = utils.get_domains(target) datas = list() if domains: for domain in domains: table_name = domain.replace('.', '_') rows = database.export_data(table_name, alive, limit) if rows is None: continue data, _, _ = do_export(fmt, path, rows, show, domain, target) datas.extend(data) database.close() if len(domains) > 1: utils.export_all(alive, fmt, path, datas) return datas
def get_hazards(hazard_type_param: str): print("Get Hazards Summary") try: hazard_type = HazardType.from_string(hazard_type_param) except ValueError: # send back an exception abort(400, "Hazard Type {0} does not exist.".format(hazard_type_param)) return db = Database() data_from_db = db.get_hazards_by_type(hazard_type) db.close() data_to_return = parse_hazard_summary_info_from_db(data_from_db, hazard_type) print(data_to_return) return jsonify(data_to_return)
def mark(self): """ Mark the new discovered subdomain :return: marked data :rtype: list """ db = Database() old_data = list() now_data = db.get_data(self.domain).as_dict() # Database pre-processing when it is not the first time to collect this subdomain if db.exist_table(self.new_table): # If there is the last collection result table, delete it first db.drop_table(self.old_table) # Rename the new table to the old table db.rename_table(self.new_table, self.old_table) old_data = db.get_data(self.old_table).as_dict() db.close() marked_data = utils.mark_subdomain(old_data, now_data) return marked_data
def export(table, db=None, alive=False, limit=None, path=None, format='csv', show=False): """ OneForAll export from database module Example: python3 dbexport.py --table name --format csv --dir= ./result.csv python3 dbexport.py --db result.db --table name --show False Note: --alive True/False Only export alive subdomains or not (default False) --format rst/csv/tsv/json/yaml/html/jira/xls/xlsx/dbf/latex/ods (result format) --path Result directory (default directory is ./results) :param str table: Table to be exported :param str db: Database path to be exported (default ./results/result.sqlite3) :param bool alive: Only export the results of alive subdomains (default False) :param str limit: Export limit (default None) :param str format: Result format (default csv) :param str path: Result directory (default None) :param bool show: Displays the exported data in terminal (default False) """ database = Database(db) rows = database.export_data(table, alive, limit) format = utils.check_format(format, len(rows)) path = utils.check_path(path, table, format) if show: print(rows.dataset) data = rows.export(format) database.close() utils.save_data(path, data) logger.log('ALERT', f'The subdomain result for {table}: {path}') data_dict = rows.as_dict() return data_dict
def main(self): if self.brute is None: self.brute = config.enable_brute_module if self.dns is None: self.dns = config.enable_dns_resolve if self.req is None: self.req = config.enable_http_request old_table = self.domain + '_last_result' new_table = self.domain + '_now_result' collect = Collect(self.domain, export=False) collect.run() if self.brute: # 由于爆破会有大量dns解析请求 并发爆破可能会导致其他任务中的网络请求异常 brute = AIOBrute(self.domain, export=False) brute.run() db = Database() original_table = self.domain + '_original_result' db.copy_table(self.domain, original_table) db.remove_invalid(self.domain) db.deduplicate_subdomain(self.domain) old_data = [] # 非第一次收集子域的情况时数据库预处理 if db.exist_table(new_table): db.drop_table(old_table) # 如果存在上次收集结果表就先删除 db.rename_table(new_table, old_table) # 新表重命名为旧表 old_data = db.get_data(old_table).as_dict() # 不解析子域直接导出结果 if not self.dns: # 数据库导出 dbexport.export(self.domain, valid=self.valid, format=self.format, show=self.show) db.drop_table(new_table) db.rename_table(self.domain, new_table) db.close() return self.data = db.get_data(self.domain).as_dict() # 标记新发现子域 self.data = utils.mark_subdomain(old_data, self.data) # 获取事件循环 loop = asyncio.get_event_loop() asyncio.set_event_loop(loop) # 解析子域 task = resolve.bulk_resolve(self.data) self.data = loop.run_until_complete(task) # 保存解析结果 resolve_table = self.domain + '_resolve_result' db.drop_table(resolve_table) db.create_table(resolve_table) db.save_db(resolve_table, self.data, 'resolve') # 不请求子域直接导出结果 if not self.req: # 数据库导出 dbexport.export(resolve_table, valid=self.valid, format=self.format, show=self.show) db.drop_table(new_table) db.rename_table(self.domain, new_table) db.close() return # 请求子域 task = request.bulk_request(self.data, self.port) self.data = loop.run_until_complete(task) self.datas.extend(self.data) # 在关闭事件循环前加入一小段延迟让底层连接得到关闭的缓冲时间 loop.run_until_complete(asyncio.sleep(0.25)) count = utils.count_valid(self.data) logger.log('INFOR', f'经验证{self.domain}有效子域{count}个') # 保存请求结果 db.clear_table(self.domain) db.save_db(self.domain, self.data, 'request') # 数据库导出 dbexport.export(self.domain, valid=self.valid, format=self.format, show=self.show) db.drop_table(new_table) db.rename_table(self.domain, new_table) db.close() # 子域接管检查 if self.takeover: subdomains = set(map(lambda x: x.get('subdomain'), self.data)) takeover = Takeover(subdomains) takeover.run()
def run(): """ 1. Read in image file 2. Pull summary statistics from file 3. Compress image 4. Pad image as appropriate 5. Add date onto image 6. Save images locally 7. Push images to S3 Bucket 8. Remove local copies of images 9. Create appropriate objects 10. Push new object references to database """ today = datetime.now().strftime("%Y-%m-%d") log_file = os.path.abspath("logs/{}.log".format(today)) logger = RsmasLogger("pipeline", log_file) images = get_list_of_images() # LOG: list of images to process logger.log(loglevel.INFO, [img.key for img in images]) for im in images: logger.log(loglevel.INFO, "Processing image: {}".format(im.key)) file_path = "{}/{}".format(im.bucket_name, im.key) full_path = "{}_full.jpg" mod_path = "{}_mod.jpg" aws_path = "{}/{}/{}/{}" try: haz_id, haz_name, sat_name, sat_dir, img_type, img_date, center = summary.pull_summary_data( "/vsis3/{}".format(file_path)) sat_id = Satellite.from_params(sat_name, bool(sat_dir)) except: # LOG: error in image metadata format logger.log( loglevel.ERROR, '\tThere was an error in the metadata format of the image. Skipping.' ) continue aws_path = aws_path.format(haz_id, sat_id, img_type, img_date) full_path = full_path.format(img_date) mod_path = mod_path.format(img_date) # 1. Read in image file with rasterio.open("s3://{}".format(file_path)) as data: band = data.read(1) img = plot.show(band) img.get_figure().savefig(full_path, dpi=300) # 3. Compress image compressed = immanip.compress_image(full_path, compression_amount=0.3) # 4 - 5. Pad image and add date on image text_image = immanip.add_text_to_image(compressed, img_date) # 6. Save image locally text_image.save(mod_path.format(img_date)) mod_path_aws = save.get_s3_url("{}/{}".format(aws_path, mod_path)) full_path_aws = save.get_s3_url("{}/{}".format(aws_path, full_path)) tif_path_aws = save.get_s3_url("{}/{}".format(aws_path, im.key)) # LOG: images successfully moved to S3 bucket # LOG: mod_path_aws, full_path_aws, tif_path_aws hazard = Hazard(haz_id, haz_name, HazardType.VOLCANO, Location(center[0], center[1]), Date(img_date), 0) satellite = Satellite.from_params(sat_name, bool(sat_dir)) image = Image(str(randint(1, 10000000)), haz_id, satellite, ImageType.from_string(img_type), Date(img_date), ImageURL(full_path_aws), ImageURL(tif_path_aws), ImageURL(mod_path_aws)) try: db = Database() except ConnectionError: logger.log( loglevel.ERROR, "\tThere was an error while connecting to the database. Skipping this image." ) continue db.create_new_hazard(hazard) db.create_new_satellite(satellite) db.create_new_image(image) db.close() # LOG: database successfully updated logger.log(loglevel.INFO, "\tDatabase succesfully updated.") save.save_image_s3(mod_path, "{}/{}".format(aws_path, mod_path)) save.save_image_s3(full_path, "{}/{}".format(aws_path, full_path)) save.move_tif(im.key, "{}/{}".format(aws_path, im.key)) logger.log(loglevel.INFO, "\tImages were successfully uploaded to the S3 bucket") logger.log(loglevel.INFO, "\t\tmod_path_aws: {}".format(mod_path_aws)) logger.log(loglevel.INFO, "\t\tfull_path_aws: {}".format(full_path_aws)) logger.log(loglevel.INFO, "\t\ttif_path_aws: {}".format(tif_path_aws)) # LOG: image completed logger.log(loglevel.INFO, "\tProcessing of {} completed.".format(im.key)) # LOG: finished processing images logger.log(loglevel.INFO, "Processing complete.")
def export(table, db=None, valid=None, dpath=None, format='xls', show=False): """ OneForAll数据库导出模块 Example: python3 dbexport.py --table name --format csv --dir= ./result.csv python3 dbexport.py --db result.db --table name --show False Note: 参数port可选值有'small', 'medium', 'large', 'xlarge',详见config.py配置 参数format可选格式有'txt', 'rst', 'csv', 'tsv', 'json', 'yaml', 'html', 'jira', 'xls', 'xlsx', 'dbf', 'latex', 'ods' 参数dir为None默认使用OneForAll结果目录 :param str table: 要导出的表 :param str db: 要导出的数据库路径(默认为results/result.sqlite3) :param int valid: 导出子域的有效性(默认None) :param str format: 导出格式(默认xls) :param str dpath: 导出目录(默认None) :param bool show: 终端显示导出数据(默认False) """ formats = [ 'txt', 'rst', 'csv', 'tsv', 'json', 'yaml', 'html', 'jira', 'xls', 'xlsx', 'dbf', 'latex', 'ods' ] if format not in formats: logger.log('FATAL', f'不支持{format}格式导出') return if dpath is None: dpath = config.result_save_path if isinstance(dpath, str): dpath = Path(dpath) if not dpath.is_dir(): logger.log('FATAL', f'{dpath}不是目录') return if not dpath.exists(): logger.log('ALERT', f'不存在{dpath}将会新建此目录') dpath.mkdir(parents=True, exist_ok=True) database = Database(db) if valid is None: rows = database.get_data(table) elif isinstance(valid, int): rows = database.get_subdomain(table, valid) else: rows = database.get_data(table) # 意外情况导出全部子域 if show: print(rows.dataset) if format == 'txt': data = str(rows.dataset) else: data = rows.export(format) database.close() fpath = dpath.joinpath(f'{table}.{format}') try: with open(fpath, 'w', encoding="utf-8", newline='') as file: file.write(data) logger.log('INFOR', '成功完成导出') logger.log('INFOR', fpath) except TypeError: with open(fpath, 'wb') as file: file.write(data) logger.log('INFOR', '成功完成导出') logger.log('INFOR', fpath) except Exception as e: logger.log('ERROR', e)