def read_ad_data(account_id, campaign_id): path_manager = EnvManager() insight_dir = path_manager.get_insight_dir(mconstant.NODE_TYPE_AD) account_dir = os.path.join(insight_dir, account_id) config_path = path_manager.get_conf_dir() output_path = os.path.join(path_manager.get_output_dir(), mconstant.NODE_TYPE_AD) fhelper.make_dir(output_path) handler_factory = HandlerFactory(config_path) insight_handler = handler_factory.get_insight_handler( mconstant.NODE_TYPE_AD) in_act_files = fhelper.get_file_list(account_dir) data_per_day = read_ad_insight_day(in_act_files, insight_handler, campaign_id) ad_name_key = '' group_data = group_by_name(data_per_day, ad_name_key) if ad_name_key.strip(): file_name = account_id + '_' + ad_name_key + '_' + str( time.time()) + '.xlsx' act_file = os.path.join(output_path, file_name) ExcelExporter.export_data_excel(group_data, act_file) else: file_name = account_id + '_' + str(time.time()) + '.xlsx' act_file = os.path.join(output_path, file_name) ExcelExporter.export_excel_by_key(iconstant.NEW_FIELD_GROUP_AD_NAME, iconstant.NEW_FIELD_GROUP_AD_NAME, group_data, act_file)
def merger_all_account_data(node_type): path_manager = EnvManager() insight_dir = path_manager.get_insight_dir(node_type) retention_dir = path_manager.get_retention_dir(node_type) account_list = fhelper.get_subdir_name_list(retention_dir) config_path = path_manager.get_conf_dir() merger_handler = MergerDataHandler(config_path) output_path = os.path.join(path_manager.get_output_dir(), node_type) fhelper.make_dir(output_path) for account_id in account_list: in_act_dir = os.path.join(insight_dir, account_id) re_act_dir = os.path.join(retention_dir, account_id) in_act_files = fhelper.get_file_list(in_act_dir) re_act_files = fhelper.get_file_list(re_act_dir) merger_data = merger_handler.merger_in_re_per_day( in_act_files, re_act_files, node_type) output_act_path = os.path.join(output_path, account_id) fhelper.make_dir(output_act_path) ExcelExporter.export_merger_retention(dhelper.get_key_id(node_type), dhelper.get_key_id(node_type), merger_data, output_act_path)
def read_all_account_data(node_type): path_manager = EnvManager() insight_dir = path_manager.get_insight_dir(node_type) account_list = fhelper.get_subdir_name_list(insight_dir) config_path = path_manager.get_conf_dir() output_path = os.path.join(path_manager.get_output_dir(), node_type) fhelper.make_dir(output_path) handler_factory = HandlerFactory(config_path) insight_handler = handler_factory.get_insight_handler(node_type) for account_id in account_list: in_act_dir = os.path.join(insight_dir, account_id) in_act_files = fhelper.get_file_list(in_act_dir) file_name = account_id + '_' + str(time.time()) + '.xlsx' act_file = os.path.join(output_path, file_name) read_insight_per_day(in_act_files, insight_handler, act_file, node_type, account_id)
class InsightReader: def __init__(self, account_id, handler_factory, start_date='', end_date='', node_types=None, insight_path=''): self._path_manager = EnvManager() self._handler_factory = handler_factory self._current_act_id = account_id self._all_data = {} self._all_handler = {} self._start_date = start_date self._end_date = end_date if not end_date: self._end_date = mhelper.get_now_date() if node_types: self._node_type = node_types else: self._node_type = [ mconstant.NODE_TYPE_CAMPAIGN, mconstant.NODE_TYPE_ADSET, mconstant.NODE_TYPE_AD ] self._insight_path = insight_path self._read_all_data() def read_daily_data(self, node_type, start_date='', end_date=''): if node_type not in self._all_handler: return None if node_type not in self._all_data: return None handler = self._all_handler[node_type] insight_data = self._all_data[node_type] data_per_day = handler.get_insight_data_per_day(insight_data) if not end_date: end_date = self._end_date index_mask = np.logical_and(data_per_day.index >= start_date, data_per_day.index <= end_date) filter_data = data_per_day[index_mask] return filter_data def read_hourly_data(self, node_type, start_date='', end_date=''): if node_type not in self._all_handler: return None if node_type not in self._all_data: return None handler = self._all_handler[node_type] insight_data = self._all_data[node_type] increment_data = handler.read_increment_by_data(insight_data) hour_data = handler.get_increment_per_hour(increment_data) if not end_date: end_date = self._end_date start_object = mhelper.convert_str_date(start_date, '%Y-%m-%d').date() end_object = mhelper.convert_str_date(end_date, '%Y-%m-%d').date() index_mask = np.logical_and(hour_data.index.date >= start_object, hour_data.index.date <= end_object) filter_data = hour_data[index_mask] return filter_data def _read_all_data(self): for ntype in self._node_type: data_handler = self._handler_factory.get_insight_handler(ntype) file_list = self._get_file_list(ntype) insight_datas = [ data_handler.read_insight_data(file_path, self._current_act_id) for file_path in file_list ] if not insight_datas: continue all_insight_data = pd.concat(insight_datas) self._all_data[ntype] = all_insight_data self._all_handler[ntype] = data_handler def _get_file_list(self, node_type): node_dir = self._path_manager.get_insight_dir(node_type, self._insight_path) account_dir = ospath.join(node_dir, self._current_act_id) node_files = fhelper.get_file_list(account_dir) filtered_files = [ path for path in node_files if self._filter_file(path) ] return filtered_files def _filter_file(self, file_path): date_dirname = ospath.basename(file_path) pattern = '\d+-\d+-\d+' search_date = re.search(pattern, date_dirname) if not search_date: return True date_dir = search_date.group() if self._start_date <= date_dir <= self._end_date: return True else: return False