def gen_xls_data(book_name, sheet_name): book_name += '.xls' book_path = HOMEPATH/'xls_case'/book_name book = Databook().load('xls', open(str(book_path), 'rb').read()) for sheet in book.sheets(): if sheet.title == sheet_name: ret = json.loads(sheet.json) return map(parse, ret) return []
def get_IRM_report_data_set(input_file, sheet_name): print("Reading from file {}...".format(input_file)) data_book = Databook().load('xlsx', open(input_file, 'rb').read()) sheets = data_book.sheets() try: return next(data_set for data_set in sheets if data_set.title.strip() == sheet_name) except StopIteration: sys.exit('Sheet not found in xlsx file.')
def gen_data(book_name, sheet_name): book_name += '.xls' sheet_name = sheet_name.partition('_')[-1] book_path = HOMEPATH / 'datas' / book_name dbook = Databook().load('xls', open(str(book_path), 'rb').read()) for sheet in dbook.sheets(): if sheet.title == sheet_name: ret = json.loads(sheet.json) return map(parse, ret) return []
def export(self, task=None): """ Export the resources to a file. :param task: optional celery task. If given, the task state will be updated. """ book = Databook() export_kwargs = {} if task is not None: total = sum([ resource.get_queryset().count() for resource in self.resources ]) export_kwargs['task_meta'] = { 'task': task, 'total': total, 'done': 0 } num_queries_start = len(connection.queries) for resource in self.resources: model = resource.Meta.model logger.debug('Export kwargs: %s' % export_kwargs) dataset = resource.export( **export_kwargs ) # takes optional queryset argument (select related) len_queries = len(connection.queries) queries = len_queries - num_queries_start logger.info('Number of objects: %d' % resource.get_queryset().count()) logger.info('Executed %d queries' % queries) num_queries_start = len_queries if task is not None: export_kwargs['task_meta']['done'] += dataset.height if resource.title is not None: dataset.title = force_text( resource.title)[:31] # maximum of 31 chars int title else: dataset.title = u'{name} ({app}.{model})'.format( name=model._meta.verbose_name_plural, app=model._meta.app_label, model=model.__name__)[:31] # maximum of 31 chars int title book.add_sheet(dataset) return book
def generate_fieldxls(self): headers = [ 'Game Date', 'Day', 'Time', 'Division', 'Home', 'Visitor', 'Venue' ] datasheet_list = list() for fieldinfo in self.fieldinfo_list: field_name = fieldinfo['field_name'] field_id = fieldinfo['field_id'] datasheet = Dataset(title=field_name) datasheet.headers = list(headers) match_list = self.sdbinterface.get_schedule('field_id', field_id=field_id) tabformat_list = [ (x['game_date'], parser.parse(x['game_date']).strftime("%a"), datetime.strptime(x['start_time'], "%H:%M").strftime("%I:%M%p"), x['div_age'] + x['div_gen'], x['home'], x['away'], field_name) for x in match_list ] for tabformat in tabformat_list: datasheet.append(tabformat) datasheet_list.append(datasheet) book = Databook(datasheet_list) bookname_xls_relpath = self.schedcol_name + "_byField.xls" bookname_xls_fullpath = os.path.join(self.dir_path, bookname_xls_relpath) with open(bookname_xls_fullpath, 'wb') as f: f.write(book.xls) f.close() return [{'path': bookname_xls_relpath}]
def import_snapshot(file, irm_snapshot_id): data_book = Databook().load(file, "xlsx") dataset = data_book.sheets()[0] preprocessed_dataset = preprocess(dataset) # Ensure projects exist for IRM_project_id in preprocessed_dataset["Project ID"]: if IRM_project_id: models.ProvInfraProject.objects.get_or_create( IRM_project_id=IRM_project_id) if len(preprocessed_dataset) > 0: preprocessed_dataset.append_col([irm_snapshot_id] * len(preprocessed_dataset), header="irm_snapshot") resource = ProvInfraProjectSnapshotResource() result = resource.import_data(preprocessed_dataset) return result
def generate_divxls(self, genxls_id): headers = [ 'Game Date', 'Day', 'Time', 'Division', 'Home', 'Visitor', 'Venue' ] datasheet_list = list() for divinfo in self.divinfo_list: div_id = divinfo[genxls_id] div_age = divinfo['div_age'] div_gen = divinfo['div_gen'] div_str = div_age + div_gen datasheet = Dataset(title=div_str) datasheet.headers = list(headers) match_list = self.sdbinterface.get_schedule(genxls_id, div_age=div_age, div_gen=div_gen) # note conversions for time from 24-hour to am/pm format tabformat_list = [ (x['game_date'], parser.parse(x['game_date']).strftime("%a"), datetime.strptime(x['start_time'], "%H:%M").strftime("%I:%M%p"), div_str, y['home'], y['away'], self.fieldinfo_list[self.findexerGet( y['venue'])]['field_name']) for x in match_list for y in x['gameday_data'] ] for tabformat in tabformat_list: datasheet.append(tabformat) datasheet_list.append(datasheet) book = Databook(datasheet_list) bookname_xls_relpath = self.schedcol_name + "_byDiv.xls" bookname_xls_fullpath = os.path.join(self.dir_path, bookname_xls_relpath) with open(bookname_xls_fullpath, 'wb') as f: f.write(book.xls) f.close() return [{'path': bookname_xls_relpath}]
def test_kwargs_forwarded(self, resource_for_model, mocked_export): """ Test that kwargs passed to `export` are forwarded to the ModelResource for export. """ from exportdb.tasks import export book = Databook() book.add_sheet(Dataset()) mocked_export.return_value = book kwargs = {'foo': 'bar', 'baz': 10} export(Exporter, **kwargs) export_models = get_export_models() for i, export_model in enumerate(export_models): self.assertEqual( resource_for_model.call_args_list[i], mock.call(export_model, foo='bar', baz=10) )
def export(self, task=None): """ Export the resources to a file. :param task: optional celery task. If given, the task state will be updated. """ book = Databook() export_kwargs = {} if task is not None: total = sum([resource.get_queryset().count() for resource in self.resources]) export_kwargs['task_meta'] = {'task': task, 'total': total, 'done': 0} num_queries_start = len(connection.queries) for resource in self.resources: model = resource.Meta.model logger.debug('Export kwargs: %s' % export_kwargs) dataset = resource.export(**export_kwargs) # takes optional queryset argument (select related) len_queries = len(connection.queries) queries = len_queries - num_queries_start logger.info('Number of objects: %d' % resource.get_queryset().count()) logger.info('Executed %d queries' % queries) num_queries_start = len_queries if task is not None: export_kwargs['task_meta']['done'] += dataset.height if resource.title is not None: dataset.title = force_text(resource.title)[:31] # maximum of 31 chars int title else: dataset.title = u'{name} ({app}.{model})'.format( name=model._meta.verbose_name_plural, app=model._meta.app_label, model=model.__name__ )[:31] # maximum of 31 chars int title book.add_sheet(dataset) return book
def import_snapshot(snapshot): file = snapshot.file.read() data_book = Databook().load(file, "xlsx") dataset = data_book.sheets()[0] preprocessed_dataset = preprocess(dataset) # Ensure projects exist for IRM_project_id in preprocessed_dataset["Project ID"]: if IRM_project_id: models.InfraProject.objects.get_or_create( IRM_project_id=IRM_project_id, sphere_slug=snapshot.sphere.slug) if len(preprocessed_dataset) > 0: preprocessed_dataset.append_col([snapshot.id] * len(preprocessed_dataset), header="irm_snapshot") preprocessed_dataset.append_col([snapshot.sphere.slug] * len(preprocessed_dataset), header="sphere_slug") resource = InfraProjectSnapshotResource() result = resource.import_data(preprocessed_dataset) return result
def exportDivSchedules(self, startgameday, prefix=""): headers = [ 'Match ID', 'Gameday#', 'Game Date', 'Day', 'Time', 'Division', 'Home', 'Away', 'Field', '', 'Comment' ] datasheet_list = [] for division in self.leaguedivinfo: div_id = division['div_id'] div_age = division['div_age'] div_gen = division['div_gen'] div_str = div_age + div_gen datasheet = Dataset(title=div_str) datasheet.headers = list(headers) divdata_list = self.dbinterface.findElimTournDivisionSchedule( div_age, div_gen, min_game_id=startgameday) tabformat_list = [ (y[match_id_CONST], x[gameday_id_CONST], tournMapGamedayIdToCalendar(x[gameday_id_CONST]), tournMapGamedayIdToDate(x[gameday_id_CONST]), datetime.strptime(x[start_time_CONST], "%H:%M").strftime("%I:%M %p"), div_str, y[home_CONST], y[away_CONST], self.fieldinfo[self.findexerGet( y[venue_CONST])]['name'], '', y[comment_CONST]) for x in divdata_list for y in x[gameday_data_CONST] ] for tabformat in tabformat_list: datasheet.append(tabformat) datasheet.append_separator( "Prefix Legend: 'S'-Seeded Team#, 'W'-Winning Team (See Match ID), 'L'-Losing Team)" ) datasheet_list.append(datasheet) book = Databook(datasheet_list) cdir = os.path.dirname(__file__) bookname_xls = prefix + '.xls' bookname_html = prefix + '.html' booknamefull_xls = os.path.join( '/home/henry/workspace/datagraph/bottle_baseball/download/xls', bookname_xls) booknamefull_html = os.path.join( '~/workspace/datagraph/bottle_baseball/download/html', bookname_html) with open(booknamefull_xls, 'wb') as f: f.write(book.xls) f.close()
def generate_divteamxls(self, div_id_property): headers = [ 'Game Date', 'Day', 'Time', 'Division', 'Home', 'Visitor', 'Venue' ] file_list = list() for divinfo in self.divinfo_list: div_id = divinfo[div_id_property] div_age = divinfo['div_age'] div_gen = divinfo['div_gen'] div_str = div_age + div_gen totalteams = divinfo['totalteams'] datasheet_list = list() teamrange = range(1, totalteams + 1) for team_id in teamrange: team_str = div_str + str(team_id) datasheet = Dataset(title=team_str) datasheet.headers = list(headers) match_list = self.sdbinterface.get_schedule('team_id', div_age=div_age, div_gen=div_gen, team_id=team_id) tabformat_list = [ (x['game_date'], parser.parse(x['game_date']).strftime("%a"), datetime.strptime(x['start_time'], "%H:%M").strftime("%I:%M%p"), div_str, x['home'], x['away'], self.fieldinfo_list[self.findexerGet( x['venue'])]['field_name']) for x in match_list ] for tabformat in tabformat_list: datasheet.append(tabformat) datasheet_list.append(datasheet) book = Databook(datasheet_list) bookname_xls_relpath = self.schedcol_name + div_str + "_byTeam.xls" bookname_xls_fullpath = os.path.join(self.dir_path, bookname_xls_relpath) with open(bookname_xls_fullpath, 'wb') as f: f.write(book.xls) f.close() file_list.append({'path': bookname_xls_relpath, 'mdata': div_str}) return file_list
def get_dataset(self): datasets = [] for metric in sorted(self.metrics): dataset_cls = self.get_dataset_class(metric) if not dataset_cls: continue gardens = self.get_gardens() measurement_system = find_preferred_measurement_system(gardens) ds = dataset_cls(gardens=gardens, measurement_system=measurement_system) # Only append if there is data to append if not ds.height: continue # Sheet titles can be 31 characters at most, cannot contain :s ds.title = metric[:31].replace(':', '') datasets.append(ds) if not datasets: raise Http404 return Databook(datasets)
def exportFieldSchedule(self, startgameday, prefix=""): headers = [ 'Game#', 'Date', 'Day', 'Time', 'Division', 'Round', 'Home', 'Visitor' ] datasheet_list = [] for field in self.fieldinfo: field_name = field['name'] field_id = field['field_id'] datasheet = Dataset(title=field_name) datasheet.headers = list(headers) fielddata_list = self.dbinterface.findFieldSchedule( field_id, min_game_id=startgameday, tourntype='E') tabformat_list = [ (x[match_id_CONST], tournMapGamedayIdToCalendar(x[gameday_id_CONST]), tournMapGamedayIdToDate(x[gameday_id_CONST]), datetime.strptime(x[start_time_CONST], "%H:%M").strftime("%I:%M %p"), x[age_CONST] + x[gen_CONST], x[round_CONST], x[home_CONST], x[away_CONST]) for x in fielddata_list ] for tabformat in tabformat_list: datasheet.append(tabformat) #datasheet.append_separator("Prefix Legend: 'S'-Seeded Team#, 'W'-Winning Team (See Match ID), 'L'-Losing Team)") datasheet_list.append(datasheet) book = Databook(datasheet_list) cdir = os.path.dirname(__file__) bookname_xls = prefix + '_byField.xls' bookname_html = prefix + 'byField.html' booknamefull_xls = os.path.join( '/home/henry/workspace/datagraph/bottle_baseball/download/xls', bookname_xls) booknamefull_html = os.path.join( '~/workspace/datagraph/bottle_baseball/download/html', bookname_html) with open(booknamefull_xls, 'wb') as f: f.write(book.xls) f.close()
def exportDivTeamSchedules(self, div_id, age, gen, numteams, prefix=""): headers = [ 'Gameday#', 'Game Date', 'Day', 'Start Time', 'Venue', 'Home Team', 'Away Team' ] datasheet_list = [] for team_id in range(1, numteams + 1): team_str = age + gen + str(team_id) datasheet = Dataset(title=team_str) datasheet.headers = list(headers) teamdata_list = self.dbinterface.findTeamSchedule( age, gen, team_id) tabformat_list = [ (x[gameday_id_CONST], tournMapGamedayIdToCalendar(x[gameday_id_CONST]), tournMapGamedayIdToDate(x[gameday_id_CONST]), datetime.strptime(x[start_time_CONST], "%H:%M").strftime("%I:%M %p"), self.fieldinfo[self.findexerGet(x[venue_CONST])]['name'], x[home_CONST], x[away_CONST]) for x in teamdata_list ] for tabformat in tabformat_list: datasheet.append(tabformat) datasheet_list.append(datasheet) book = Databook(datasheet_list) cdir = os.path.dirname(__file__) bookname_xls = prefix + age + gen + '_schedule.xls' bookname_html = prefix + age + gen + '_schedule.html' booknamefull_xls = os.path.join( '/home/henry/workspace/datagraph/bottle_baseball/download/xls', bookname_xls) booknamefull_html = os.path.join( '~/workspace/datagraph/bottle_baseball/download/html', bookname_html) with open(booknamefull_xls, 'wb') as f: f.write(book.xls) f.close() '''
def get_dataset(self): datasets = [] # Mapping of gardens to obfuscated ids, global for each download garden_mapping = {} for metric in sorted(self.metrics): dataset_cls = metric['public_dataset'] if not dataset_cls: continue ds = dataset_cls(filters=self.get_queryset_filters(self.request), measurement_system='imperial') # Replace garden column with a randomized unique id try: index = ds.headers.index('garden') ds.insert_col( index, lambda r: obfuscated_garden(garden_mapping, r, index), header='garden unique id') del ds['garden'] except IndexError: # Ignore case where garden column not present pass # Only append if there is data to append if not ds.height: continue # Sheet titles can be 31 characters at most, cannot contain :s ds.title = metric['name'][:31].replace(':', '') datasets.append(ds) if not datasets: raise Http404 return Databook(datasets)
# get_user_usage(default_database,867838020061714) # uu = UserUsage(get_user_usage(default_database, 867838020061714), imei= 867838020061714) # print(uu.package_set) # print(len(uu.package_set)) # print(uu.is_dial()) # print(uu.get_total_usage_time()) def get_package_name(package): return None book = Databook() for database in database_list: package_active = {} user_dataset = Dataset() package_dateset = Dataset() user_headers = ('imei', "总使用时长", "总使用应用数") package_headers = ("package", "应用名称", "总使用时长", "总使用人数", "人均使用时长", "日活跃率") user_data = [] package_data = [] user_list = get_user_list(database) total_user = len(user_list)
except FileNotFoundError as e: print("the input file path is:", excel_name, "error happens:", e) return None except KeyError as ex: print("the input sheet name is:", sheet_name, "error happens", ex) return None return package_list # 根据输入的 wanted_package_category_work_book = openpyxl.load_workbook(excel_in_path) wanted_package_category_list = wanted_package_category_work_book.get_sheet_names( ) model_list = ["x6app", "x6plusapp", "y37app", "xplay5app"] # hardcode 需要进一步更改 general_result = Databook() # 记录所有的机型和app类别安装信息 for model in model_list: model_general_result = Dataset() # 结果的第一个汇总值的内容 for package_category in wanted_package_category_list: general_category_ wanted_packages = get_wanted_packages(excel_in_path, package_category) imei_set = set() package_count = 0 model_specific_category_result = {} # 记录特定类别中单独应用的使用人数 for package in wanted_packages: package_count += 1 print(model, "package category is ", package_category, "percent:", package_count / len(wanted_packages)) imei_list = get_imei_from_package(model, package) imei_set = set(imei_list) | imei_set # 记录特定类别的整体使用人数
def databook(data): """ `data` is a tuple of datasets. """ return Databook(data)
def get_whole_results(output_dir_path="./"): """ 根据输入的机型数据和 输出类型 输出计算结果,默认的输出类型为excel文件,但是后期可以更换为json等,来满足网页显示 不需要输出phone的名称,这个函数返回最终的计算结果,并以excel的形式展现 :param output_dir_path: 存放文件的目录 :param phone_model: 机型名称 :param output_type: 输出结果的类型 :return: None """ work_book = Databook() category_info = get_all_category(marker_apps_excel) common_headers = [] for model in phone_model: common_headers.append(model) summary = {} for category in category_info: category_headers = list(common_headers) category_headers.insert(0, category) data_list = [] package_list = category_info[category] for package in package_list: package_data = [get_package_name(package)] for model in phone_model: try: user_number = package_users[model][package]["user_num"] except KeyError as ke: user_number = 0 package_data.append(str(user_number)) data_list.append(tuple(package_data)) unique_user = ["unique user"] print(data_list) for model in phone_model: unique_user_number = len( get_union_user(package_list, package_users[model])) unique_user.append(unique_user_number) summary[category] = unique_user[1:] data_list.append(tuple(unique_user)) work_sheet = Dataset(*data_list, title=category, headers=category_headers) work_book.add_sheet(work_sheet) summary_data = [] for category in summary: user_numbers = summary[category] rate_line = [category] for index in range(len(user_numbers)): rate_line.append( int(user_numbers[index]) / phone_user_number_list[index]) # for model in phone_model: # rate_line.append(int(user_number)/phone_model_user_number[model]) summary_data.append(tuple(rate_line)) summary_headers = ["分类"] for model in phone_model: summary_headers.append(model) import time summary_sheet = Dataset(*summary_data, headers=summary_headers, title="summary") with open(output_dir_path + "summary.xlsx", "wb") as f: f.write(summary_sheet.xlsx) with open(output_dir_path + "details.xlsx", "wb") as f: f.write(work_book.xlsx)
"packages": user_packages_line[2:] } user_packages[model] = temp_user_packages with open(package_user_dir + "\\"+ model + "_package_users.txt", "r") as f: for line in f: package_users_line = line.split(",") temp_package_users[package_users_line[0]] = { "user_num": package_users_line[1], "user_list": package_users_line[2:] } package_users[model] = temp_package_users load_data() work_book = Databook() TOP_N = 1000 for model in package_users: headers = ("应用包名", "应用名称", "应用安装数量", "一级分类", "二级分类", "是否预装", "安装比率") data = [] package_ranks[model] = get_package_rank(package_users[model]) print("model is ", model) top_apps = get_top_apps(package_ranks[model], TOP_N, start=0) print(top_apps) for index in range(len(top_apps)): print() package_detail = get_package_details(top_apps[index][0], model) data.append((package_detail["package"], package_detail["name"], package_detail["numbers"], package_detail["first_category"], package_detail["second_category"], package_detail["is_prestall"], int(package_detail["numbers"]) / phone_model_user_number[model]))
def save(self): book = Databook() self.data.title = self.out_sheet book.add_sheet(self.data) with open(self.out_fname_prefix + '.' + self.out_type, 'wb') as f: f.write(book.export(self.out_type))
def get(self, request): book = Databook() book.add_sheet(self.bilateral_table_data_sheet()) book.add_sheet(self.multilateral_and_foundation_table_data_sheet()) book.add_sheet(self.five_largest_graph_data_sheet()) book.add_sheet(self.seven_largest_single_data_sheet()) book.add_sheet(self.other_disbursements_data_sheet()) response = HttpResponse(book.xls, mimetype='application/ms-excel') response['Content-Disposition'] = 'attachment; filename=%s.xls' % u"back_data" return response
def get_whole_results(output_dir_path="./"): """ 根据输入的机型数据和 输出类型 输出计算结果,默认的输出类型为excel文件,但是后期可以更换为json等,来满足网页显示 不需要输出phone的名称,这个函数返回最终的计算结果,并以excel的形式展现 :param output_dir_path: 存放文件的目录 :param phone_model: 机型名称 :param output_type: 输出结果的类型 :return: None """ work_book = Databook() category_info = get_all_category(marker_apps_excel) common_headers = [] for model in phone_model: common_headers.append(model) summary = {} for category in category_info: category_headers = list(common_headers) category_headers.insert(0, category) data_list = [] package_list = category_info[category] for package in package_list: package_data = [get_package_name(package)] for model in phone_model: try: user_number = package_users[model][package]["user_num"] except KeyError as ke: user_number = 0 package_data.append(str(user_number)) data_list.append(tuple(package_data)) unique_user = ["unique user"] print(data_list) for model in phone_model: unique_user_number = len(get_union_user(package_list, package_users[model])) unique_user.append(unique_user_number) summary[category] = unique_user[1:] data_list.append(tuple(unique_user)) work_sheet = Dataset(*data_list, title=category, headers=category_headers) work_book.add_sheet(work_sheet) summary_data = [] for category in summary: user_numbers = summary[category] rate_line = [category] for index in range(len(user_numbers)): rate_line.append(int(user_numbers[index])/phone_user_number_list[index]) # for model in phone_model: # rate_line.append(int(user_number)/phone_model_user_number[model]) summary_data.append(tuple(rate_line)) summary_headers = ["分类"] for model in phone_model: summary_headers.append(model) import time summary_sheet = Dataset(*summary_data,headers= summary_headers, title="summary" ) with open(output_dir_path + "summary.xlsx","wb") as f: f.write(summary_sheet.xlsx) with open(output_dir_path + "details.xlsx", "wb") as f: f.write(work_book.xlsx)